net/base/net_util.cc - Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_...

Side by Side Diff: net/base/net_util.cc

Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <unicode/regex.h>	7 #include <unicode/regex.h>

8 #include <unicode/ucnv.h>	8 #include <unicode/ucnv.h>

9 #include <unicode/uidna.h>	9 #include <unicode/uidna.h>

10 #include <unicode/ulocdata.h>	10 #include <unicode/ulocdata.h>

(...skipping 137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148 0xFFFF, // Used to block all invalid port numbers (see	148 0xFFFF, // Used to block all invalid port numbers (see

149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())	149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())

150 };	150 };

151	151

152 // FTP overrides the following restricted ports.	152 // FTP overrides the following restricted ports.

153 static const int kAllowedFtpPorts[] = {	153 static const int kAllowedFtpPorts[] = {

154 21, // ftp data	154 21, // ftp data

155 22, // ssh	155 22, // ssh

156 };	156 };

157	157

158 template<typename STR>

159 STR GetSpecificHeaderT(const STR& headers, const STR& name) {

160 // We want to grab the Value from the "Key: Value" pairs in the headers,

161 // which should look like this (no leading spaces, \n-separated) (we format

162 // them this way in url_request_inet.cc):

163 // HTTP/1.1 200 OK\n

164 // ETag: "6d0b8-947-24f35ec0"\n

165 // Content-Length: 2375\n

166 // Content-Type: text/html; charset=UTF-8\n

167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n

168 if (headers.empty())

169 return STR();

170

171 STR match;

172 match.push_back('\n');

173 match.append(name);

174 match.push_back(':');

175

176 typename STR::const_iterator begin =

177 search(headers.begin(), headers.end(), match.begin(), match.end(),

178 base::CaseInsensitiveCompareASCII<typename STR::value_type>());

179

180 if (begin == headers.end())

181 return STR();

182

183 begin += match.length();

184

185 typename STR::const_iterator end = find(begin, headers.end(), '\n');

186

187 STR ret;

188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret);

189 return ret;

190 }

191

192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence	158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence

193 // of bytes. If input is invalid, return false.	159 // of bytes. If input is invalid, return false.

194 bool QPDecode(const std::string& input, std::string* output) {	160 bool QPDecode(const std::string& input, std::string* output) {

195 std::string temp;	161 std::string temp;

196 temp.reserve(input.size());	162 temp.reserve(input.size());

197 std::string::const_iterator it = input.begin();	163 std::string::const_iterator it = input.begin();

198 while (it != input.end()) {	164 while (it != input.end()) {

199 if (*it == '_') {	165 if (*it == '_') {

200 temp.push_back(' ');	166 temp.push_back(' ');

201 } else if (*it == '=') {	167 } else if (*it == '=') {

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 *is_rfc2047 = false;	235 *is_rfc2047 = false;

270 output->clear();	236 output->clear();

271 if (encoded_word.empty())	237 if (encoded_word.empty())

272 return true;	238 return true;

273	239

274 if (!IsStringASCII(encoded_word)) {	240 if (!IsStringASCII(encoded_word)) {

275 // Try UTF-8, referrer_charset and the native OS default charset in turn.	241 // Try UTF-8, referrer_charset and the native OS default charset in turn.

276 if (IsStringUTF8(encoded_word)) {	242 if (IsStringUTF8(encoded_word)) {

277 *output = encoded_word;	243 *output = encoded_word;

278 } else {	244 } else {

279 std::wstring wide_output;	245 string16 utf16_output;

280 if (!referrer_charset.empty() &&	246 if (!referrer_charset.empty() &&

281 base::CodepageToWide(encoded_word, referrer_charset.c_str(),	247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),

282 base::OnStringConversionError::FAIL,	248 base::OnStringConversionError::FAIL,

283 &wide_output)) {	249 &utf16_output)) {

284 *output = WideToUTF8(wide_output);	250 *output = UTF16ToUTF8(utf16_output);

285 } else {	251 } else {

286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));	252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));

287 }	253 }

288 }	254 }

289	255

290 return true;	256 return true;

291 }	257 }

292	258

293 // RFC 2047 : one of encoding methods supported by Firefox and relatively	259 // RFC 2047 : one of encoding methods supported by Firefox and relatively

294 // widely used by web servers.	260 // widely used by web servers.

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 std::string decoded;	373 std::string decoded;

408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,	374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

409 &decoded))	375 &decoded))

410 return false;	376 return false;

411 tmp.append(decoded);	377 tmp.append(decoded);

412 }	378 }

413 output->swap(tmp);	379 output->swap(tmp);

414 return true;	380 return true;

415 }	381 }

416	382

417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

418 // sure this doesn't properly handle all (most?) cases.

419 template<typename STR>

420 STR GetHeaderParamValueT(const STR& header, const STR& param_name,

421 QuoteRule::Type quote_rule) {

422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".

423 typename STR::const_iterator param_begin =

424 search(header.begin(), header.end(), param_name.begin(), param_name.end(),

425 base::CaseInsensitiveCompareASCII<typename STR::value_type>());

426

427 if (param_begin == header.end())

428 return STR();

429 param_begin += param_name.length();

430

431 STR whitespace;

432 whitespace.push_back(' ');

433 whitespace.push_back('\t');

434 const typename STR::size_type equals_offset =

435 header.find_first_not_of(whitespace, param_begin - header.begin());

436 if (equals_offset == STR::npos \|\| header.at(equals_offset) != '=')

437 return STR();

438

439 param_begin = header.begin() + equals_offset + 1;

440 if (param_begin == header.end())

441 return STR();

442

443 typename STR::const_iterator param_end;

444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {

445 ++param_begin; // skip past the quote.

446 param_end = find(param_begin, header.end(), '"');

447 // If the closing quote is missing, we will treat the rest of the

448 // string as the parameter. We can't set \|param_end\| to the

449 // location of the separator (';'), since the separator is

450 // technically quoted. See: http://crbug.com/58840

451 } else {

452 param_end = find(param_begin+1, header.end(), ';');

453 }

454

455 return STR(param_begin, param_end);

456 }

457

458 // Does some simple normalization of scripts so we can allow certain scripts	383 // Does some simple normalization of scripts so we can allow certain scripts

459 // to exist together.	384 // to exist together.

460 // TODO(brettw) bug 880223: we should allow some other languages to be	385 // TODO(brettw) bug 880223: we should allow some other languages to be

461 // oombined such as Chinese and Latin. We will probably need a more	386 // oombined such as Chinese and Latin. We will probably need a more

462 // complicated system of language pairs to have more fine-grained control.	387 // complicated system of language pairs to have more fine-grained control.

463 UScriptCode NormalizeScript(UScriptCode code) {	388 UScriptCode NormalizeScript(UScriptCode code) {

464 switch (code) {	389 switch (code) {

465 case USCRIPT_KATAKANA:	390 case USCRIPT_KATAKANA:

466 case USCRIPT_HIRAGANA:	391 case USCRIPT_HIRAGANA:

467 case USCRIPT_KATAKANA_OR_HIRAGANA:	392 case USCRIPT_KATAKANA_OR_HIRAGANA:

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
586 ulocdata_close(uld);	511 ulocdata_close(uld);

587 }	512 }

588 }	513 }

589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);	514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);

590 }	515 }

591	516

592 // Returns true if the given Unicode host component is safe to display to the	517 // Returns true if the given Unicode host component is safe to display to the

593 // user.	518 // user.

594 bool IsIDNComponentSafe(const char16* str,	519 bool IsIDNComponentSafe(const char16* str,

595 int str_len,	520 int str_len,

596 const std::wstring& languages) {	521 const std::string& languages) {

597 // Most common cases (non-IDN) do not reach here so that we don't	522 // Most common cases (non-IDN) do not reach here so that we don't

598 // need a fast return path.	523 // need a fast return path.

599 // TODO(jungshik) : Check if there's any character inappropriate	524 // TODO(jungshik) : Check if there's any character inappropriate

600 // (although allowed) for domain names.	525 // (although allowed) for domain names.

601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and	526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and

602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt	527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt

603 // For now, we borrow the list from Mozilla and tweaked it slightly.	528 // For now, we borrow the list from Mozilla and tweaked it slightly.

604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because	529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because

605 // they're gonna be canonicalized to U+0020 and full stop before	530 // they're gonna be canonicalized to U+0020 and full stop before

606 // reaching here.)	531 // reaching here.)

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
670 // (sync'd with characters allowed in url_canon_host with square	595 // (sync'd with characters allowed in url_canon_host with square

671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.	596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.

672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),	597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),

673 status);	598 status);

674 DCHECK(U_SUCCESS(status));	599 DCHECK(U_SUCCESS(status));

675 // Subtract common characters because they're always allowed so that	600 // Subtract common characters because they're always allowed so that

676 // we just have to check if a language-specific set contains	601 // we just have to check if a language-specific set contains

677 // the remainder.	602 // the remainder.

678 component_characters.removeAll(common_characters);	603 component_characters.removeAll(common_characters);

679	604

680 std::string languages_list(WideToASCII(languages));	605 StringTokenizer t(languages, ",");

681 StringTokenizer t(languages_list, ",");

682 while (t.GetNext()) {	606 while (t.GetNext()) {

683 if (IsComponentCoveredByLang(component_characters, t.token()))	607 if (IsComponentCoveredByLang(component_characters, t.token()))

684 return true;	608 return true;

685 }	609 }

686 return false;	610 return false;

687 }	611 }

688	612

689 // Converts one component of a host (between dots) to IDN if safe. The result	613 // Converts one component of a host (between dots) to IDN if safe. The result

690 // will be APPENDED to the given output string and will be the same as the input	614 // will be APPENDED to the given output string and will be the same as the input

691 // if it is not IDN or the IDN is unsafe to display. Returns whether any	615 // if it is not IDN or the IDN is unsafe to display. Returns whether any

692 // conversion was performed.	616 // conversion was performed.

693 bool IDNToUnicodeOneComponent(const char16* comp,	617 bool IDNToUnicodeOneComponent(const char16* comp,

694 size_t comp_len,	618 size_t comp_len,

695 const std::wstring& languages,	619 const std::string& languages,

696 string16* out) {	620 string16* out) {

697 DCHECK(out);	621 DCHECK(out);

698 if (comp_len == 0)	622 if (comp_len == 0)

699 return false;	623 return false;

700	624

701 // Only transform if the input can be an IDN component.	625 // Only transform if the input can be an IDN component.

702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

703 if ((comp_len > arraysize(kIdnPrefix)) &&	627 if ((comp_len > arraysize(kIdnPrefix)) &&

704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {	628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {

705 // Repeatedly expand the output string until it's big enough. It looks like	629 // Repeatedly expand the output string until it's big enough. It looks like

(...skipping 21 matching lines...) Expand all Loading...
727 // Failed, revert back to original string.	651 // Failed, revert back to original string.

728 out->resize(original_length);	652 out->resize(original_length);

729 }	653 }

730	654

731 // We get here with no IDN or on error, in which case we just append the	655 // We get here with no IDN or on error, in which case we just append the

732 // literal input.	656 // literal input.

733 out->append(comp, comp_len);	657 out->append(comp, comp_len);

734 return false;	658 return false;

735 }	659 }

736	660

737 struct SubtractFromOffset {	661 // Clamps the offsets in \|offsets_for_adjustment\| to the length of \|str\|.

738 explicit SubtractFromOffset(size_t amount)	662 void LimitOffsets(const string16& str,

739 : amount(amount) {}	663 std::vector<size_t>* offsets_for_adjustment) {

740 void operator()(size_t& offset) {	664 if (offsets_for_adjustment) {

741 if (offset != std::wstring::npos) {	665 std::for_each(offsets_for_adjustment->begin(),

742 if (offset >= amount)	666 offsets_for_adjustment->end(),

743 offset -= amount;	667 LimitOffset<string16>(str.length()));

744 else	668 }

745 offset = std::wstring::npos;	669 }

	670

	671 // TODO(brettw) bug 734373: check the scripts for each host component and

	672 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

	673 // scripts that the user has installed. For now, just put the entire

	674 // path through IDN. Maybe this feature can be implemented in ICU itself?

	675 //

	676 // We may want to skip this step in the case of file URLs to allow unicode

	677 // UNC hostnames regardless of encodings.

	678 string16 IDNToUnicodeWithOffsets(const std::string& host,

	679 const std::string& languages,

	680 std::vector<size_t>* offsets_for_adjustment) {

	681 // Convert the ASCII input to a string16 for ICU.

	682 string16 input16;

	683 input16.reserve(host.length());

	684 input16.insert(input16.end(), host.begin(), host.end());

	685

	686 // Do each component of the host separately, since we enforce script matching

	687 // on a per-component basis.

	688 string16 out16;

	689 {

	690 OffsetAdjuster offset_adjuster(offsets_for_adjustment);

	691 for (size_t component_start = 0, component_end;

	692 component_start < input16.length();

	693 component_start = component_end + 1) {

	694 // Find the end of the component.

	695 component_end = input16.find('.', component_start);

	696 if (component_end == string16::npos)

	697 component_end = input16.length(); // For getting the last component.

	698 size_t component_length = component_end - component_start;

	699 size_t new_component_start = out16.length();

	700 bool converted_idn = false;

	701 if (component_end > component_start) {

	702 // Add the substring that we just found.

	703 converted_idn = IDNToUnicodeOneComponent(

	704 input16.data() + component_start, component_length, languages,

	705 &out16);

	706 }

	707 size_t new_component_length = out16.length() - new_component_start;

	708

	709 if (converted_idn && offsets_for_adjustment) {

	710 offset_adjuster.Add(OffsetAdjuster::Adjustment(component_start,

	711 component_length, new_component_length));

	712 }

	713

	714 // Need to add the dot we just found (if we found one).

	715 if (component_end < input16.length())

	716 out16.push_back('.');

746 }	717 }

747 }	718 }

748	719

749 size_t amount;	720 LimitOffsets(out16, offsets_for_adjustment);

750 };	721 return out16;

751

752 struct AddToOffset {

753 explicit AddToOffset(size_t amount)

754 : amount(amount) {}

755 void operator()(size_t& offset) {

756 if (offset != std::wstring::npos)

757 offset += amount;

758 }

759

760 size_t amount;

761 };

762

763 std::vector<size_t> OffsetsIntoSection(

764 std::vector<size_t>* offsets_for_adjustment,

765 size_t section_begin) {

766 std::vector<size_t> offsets_into_section;

767 if (offsets_for_adjustment) {

768 std::transform(offsets_for_adjustment->begin(),

769 offsets_for_adjustment->end(),

770 std::back_inserter(offsets_into_section),

771 ClampComponentOffset(section_begin));

772 std::for_each(offsets_into_section.begin(), offsets_into_section.end(),

773 SubtractFromOffset(section_begin));

774 }

775 return offsets_into_section;

776 }	722 }

777	723

778 void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section,	724 // Transforms \|original_offsets\| by subtracting \|section_begin\| from all
	brettw 2011/04/27 17:47:51 section_begin -> component_begin section_begin -> component_begin
779 std::vector<size_t>* offsets_for_adjustment,	725 // offsets. Any offset which was not at least this large to begin with is set

780 size_t old_section_len,	726 // to std::string::npos.

781 size_t new_section_len,	727 std::vector<size_t> OffsetsIntoComponent(

782 size_t section_begin) {	728 const std::vector<size_t>& original_offsets,

783 if (offsets_for_adjustment) {	729 size_t component_begin) {

784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());	730 DCHECK_NE(std::string::npos, component_begin);

785 std::vector<size_t>::const_iterator host_offsets_iter =	731 std::vector<size_t> offsets_into_component(original_offsets);

786 offsets_into_section.begin();	732 for (std::vector<size_t>::iterator i(offsets_into_component.begin());

787 for (std::vector<size_t>::iterator offsets_iter =	733 i != offsets_into_component.end(); ++i) {

788 offsets_for_adjustment->begin();	734 if (*i != std::string::npos)

789 offsets_iter != offsets_for_adjustment->end();	735 i = (i < component_begin) ? std::string::npos : (*i - component_begin);

790 ++offsets_iter, ++host_offsets_iter) {	736 }

791 size_t offset = *offsets_iter;	737 return offsets_into_component;

792 if (offset == std::wstring::npos \|\| offset < section_begin) {	738 }

793 // The offset is before the host section so leave it as is.	739

794 continue;	740 // Called after we transform a component and append it to an output string.

795 }	741 // Maps \|transformed_offsets\|, which represent offsets into the transformed

796 if (offset >= section_begin + old_section_len) {	742 // component itself, into appropriate offsets for the output string, by adding

797 // The offset is after the host section so adjust by host length delta.	743 // \|output_component_begin\| to each. Determines which offsets need mapping by

798 offset += new_section_len - old_section_len;	744 // checking to see which of the \|original_offsets\| were within the designated

799 } else if (*host_offsets_iter != std::wstring::npos) {	745 // original component, using its provided endpoints.

800 // The offset is within the host and valid so adjust by the host	746 void AdjustForComponentTransform(

801 // reformatting offsets results.	747 const std::vector<size_t>& original_offsets,

802 offset = section_begin + *host_offsets_iter;	748 size_t original_component_begin,

803 } else {	749 size_t original_component_end,

804 // The offset is invalid.	750 const std::vector<size_t>& transformed_offsets,

805 offset = std::wstring::npos;	751 size_t output_component_begin,

806 }	752 std::vector<size_t>* offsets_for_adjustment) {

807 *offsets_iter = offset;	753 if (!offsets_for_adjustment)

	754 return;

	755

	756 DCHECK_NE(std::string::npos, original_component_begin);

	757 DCHECK_NE(std::string::npos, original_component_end);

	758 DCHECK_NE(string16::npos, output_component_begin);

	759 size_t offsets_size = offsets_for_adjustment->size();

	760 DCHECK_EQ(offsets_size, original_offsets.size());

	761 DCHECK_EQ(offsets_size, transformed_offsets.size());

	762 for (size_t i = 0; i < offsets_size; ++i) {

	763 size_t original_offset = original_offsets[i];

	764 if ((original_offset >= original_component_begin) &&

	765 (original_offset < original_component_end)) {

	766 size_t transformed_offset = transformed_offsets[i];

	767 (*offsets_for_adjustment)[i] = (transformed_offset == string16::npos) ?

	768 string16::npos : (output_component_begin + transformed_offset);

808 }	769 }

809 }	770 }

810 }	771 }

811	772

812 // If \|component\| is valid, its begin is incremented by \|delta\|.	773 // If \|component\| is valid, its begin is incremented by \|delta\|.

813 void AdjustComponent(int delta, url_parse::Component* component) {	774 void AdjustComponent(int delta, url_parse::Component* component) {

814 if (!component->is_valid())	775 if (!component->is_valid())

815 return;	776 return;

816	777

817 DCHECK(delta >= 0 \|\| component->begin >= -delta);	778 DCHECK(delta >= 0 \|\| component->begin >= -delta);

818 component->begin += delta;	779 component->begin += delta;

819 }	780 }

820	781

821 // Adjusts all the components of \|parsed\| by \|delta\|, except for the scheme.	782 // Adjusts all the components of \|parsed\| by \|delta\|, except for the scheme.

822 void AdjustComponents(int delta, url_parse::Parsed* parsed) {	783 void AdjustComponents(int delta, url_parse::Parsed* parsed) {

823 AdjustComponent(delta, &(parsed->username));	784 AdjustComponent(delta, &(parsed->username));

824 AdjustComponent(delta, &(parsed->password));	785 AdjustComponent(delta, &(parsed->password));

825 AdjustComponent(delta, &(parsed->host));	786 AdjustComponent(delta, &(parsed->host));

826 AdjustComponent(delta, &(parsed->port));	787 AdjustComponent(delta, &(parsed->port));

827 AdjustComponent(delta, &(parsed->path));	788 AdjustComponent(delta, &(parsed->path));

828 AdjustComponent(delta, &(parsed->query));	789 AdjustComponent(delta, &(parsed->query));

829 AdjustComponent(delta, &(parsed->ref));	790 AdjustComponent(delta, &(parsed->ref));

830 }	791 }

831	792

832 std::wstring FormatUrlInternal(const GURL& url,	793 // Helper for FormatUrlWithOffsets().

833 const std::wstring& languages,	794 string16 FormatViewSourceUrl(const GURL& url,

834 FormatUrlTypes format_types,	795 const std::vector<size_t>& original_offsets,

835 UnescapeRule::Type unescape_rules,	796 const std::string& languages,

836 url_parse::Parsed* new_parsed,	797 FormatUrlTypes format_types,

837 size_t* prefix_end,	798 UnescapeRule::Type unescape_rules,

838 std::vector<size_t>* offsets_for_adjustment);	799 url_parse::Parsed* new_parsed,

	800 size_t* prefix_end,

	801 std::vector<size_t>* offsets_for_adjustment) {

	802 DCHECK(new_parsed);

	803 const char kViewSource[] = "view-source:";

	804 const size_t kViewSourceLength = arraysize(kViewSource) - 1;

	805 std::vector<size_t> offsets_into_url(

	806 OffsetsIntoComponent(original_offsets, kViewSourceLength));

839	807

840 // Helper for FormatUrl()/FormatUrlInternal().	808 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));

841 std::wstring FormatViewSourceUrl(const GURL& url,	809 string16 result(ASCIIToUTF16(kViewSource) +

842 const std::wstring& languages,	810 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules,

843 FormatUrlTypes format_types,	811 new_parsed, prefix_end, &offsets_into_url));

844 UnescapeRule::Type unescape_rules,

845 url_parse::Parsed* new_parsed,

846 size_t* prefix_end,

847 std::vector<size_t>* offsets_for_adjustment) {

848 DCHECK(new_parsed);

849 DCHECK(offsets_for_adjustment);

850 const wchar_t* const kWideViewSource = L"view-source:";

851 const size_t kViewSourceLengthPlus1 = 12;

852 std::vector<size_t> saved_offsets(*offsets_for_adjustment);

853

854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));

855 // Clamp the offsets to the source area.

856 std::for_each(offsets_for_adjustment->begin(),

857 offsets_for_adjustment->end(),

858 SubtractFromOffset(kViewSourceLengthPlus1));

859 std::wstring result = FormatUrlInternal(real_url, languages, format_types,

860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

861 result.insert(0, kWideViewSource);

862	812

863 // Adjust position values.	813 // Adjust position values.

864 if (new_parsed->scheme.is_nonempty()) {	814 if (new_parsed->scheme.is_nonempty()) {

865 // Assume "view-source:real-scheme" as a scheme.	815 // Assume "view-source:real-scheme" as a scheme.

866 new_parsed->scheme.len += kViewSourceLengthPlus1;	816 new_parsed->scheme.len += kViewSourceLength;

867 } else {	817 } else {

868 new_parsed->scheme.begin = 0;	818 new_parsed->scheme.begin = 0;

869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;	819 new_parsed->scheme.len = kViewSourceLength - 1;

870 }	820 }

871 AdjustComponents(kViewSourceLengthPlus1, new_parsed);	821 AdjustComponents(kViewSourceLength, new_parsed);

872 if (prefix_end)	822 if (prefix_end)

873 *prefix_end += kViewSourceLengthPlus1;	823 *prefix_end += kViewSourceLength;

874 std::for_each(offsets_for_adjustment->begin(),	824 AdjustForComponentTransform(original_offsets, kViewSourceLength,

875 offsets_for_adjustment->end(),	825 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength,

876 AddToOffset(kViewSourceLengthPlus1));	826 offsets_for_adjustment);

877 // Restore all offsets which were not affected by FormatUrlInternal.	827 LimitOffsets(result, offsets_for_adjustment);

878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());

879 for (size_t i = 0; i < saved_offsets.size(); ++i) {

880 if (saved_offsets[i] < kViewSourceLengthPlus1)

881 (*offsets_for_adjustment)[i] = saved_offsets[i];

882 }

883 return result;	828 return result;

884 }	829 }

885	830

886 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,	831 class AppendComponentTransform {

887 // and the resulting range will be filled into \|out_component\|. \|unescape_rules\|	832 public:

888 // defines how to clean the URL for human readability. \|offsets_for_adjustment\|	833 AppendComponentTransform() {}

889 // is an array of offsets into \|output\| each of which will be adjusted based on	834 virtual ~AppendComponentTransform() {}

890 // how it maps to the component being converted; if it is less than	835

891 // output->length(), it will be untouched, and if it is greater than	836 virtual string16 Execute(

892 // output->length() + in_component.len it will be adjusted by the difference in	837 const std::string& component_text,

893 // lengths between the input and output components. Otherwise it points into	838 std::vector<size_t>* offsets_into_component) const = 0;

894 // the component being converted, and is adjusted to point to the same logical	839

895 // place in \|output\|. \|offsets_for_adjustment\| may not be NULL.	840 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an

	841 // accessible copy constructor in order to call AppendFormattedComponent()

	842 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).

	843 };

	844

	845 class HostComponentTransform : public AppendComponentTransform {

	846 public:

	847 explicit HostComponentTransform(const std::string& languages)

	848 : languages_(languages) {

	849 }

	850

	851 private:

	852 virtual string16 Execute(

	853 const std::string& component_text,

	854 std::vector<size_t>* offsets_into_component) const {

	855 return IDNToUnicodeWithOffsets(component_text, languages_,

	856 offsets_into_component);

	857 }

	858

	859 const std::string& languages_;

	860 };

	861

	862 class NonHostComponentTransform : public AppendComponentTransform {

	863 public:

	864 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)

	865 : unescape_rules_(unescape_rules) {

	866 }

	867

	868 private:

	869 virtual string16 Execute(

	870 const std::string& component_text,

	871 std::vector<size_t>* offsets_into_component) const {

	872 return (unescape_rules_ == UnescapeRule::NONE) ?

	873 UTF8ToUTF16AndAdjustOffsets(component_text, offsets_into_component) :

	874 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text,

	875 unescape_rules_, offsets_into_component);

	876 }

	877

	878 const UnescapeRule::Type unescape_rules_;

	879 };

	880

896 void AppendFormattedComponent(const std::string& spec,	881 void AppendFormattedComponent(const std::string& spec,

897 const url_parse::Component& in_component,	882 const url_parse::Component& original_component,

898 UnescapeRule::Type unescape_rules,	883 const std::vector<size_t>& original_offsets,

899 std::wstring* output,	884 const AppendComponentTransform& transform,

900 url_parse::Component* out_component,	885 string16* output,

	886 url_parse::Component* output_component,

901 std::vector<size_t>* offsets_for_adjustment) {	887 std::vector<size_t>* offsets_for_adjustment) {

902 DCHECK(output);	888 DCHECK(output);

903 DCHECK(offsets_for_adjustment);	889 if (original_component.is_nonempty()) {

904 if (in_component.is_nonempty()) {	890 size_t original_component_begin =

905 size_t component_begin = output->length();	891 static_cast<size_t>(original_component.begin);

906 out_component->begin = static_cast<int>(component_begin);	892 size_t output_component_begin = output->length();

	893 if (output_component)

	894 output_component->begin = static_cast<int>(output_component_begin);

907	895

908 // Compose a list of offsets within the component area.

909 std::vector<size_t> offsets_into_component =	896 std::vector<size_t> offsets_into_component =

910 OffsetsIntoSection(offsets_for_adjustment, component_begin);	897 OffsetsIntoComponent(original_offsets, original_component_begin);

	898 output->append(transform.Execute(std::string(spec, original_component_begin,

	899 static_cast<size_t>(original_component.len)), &offsets_into_component));

911	900

912 if (unescape_rules == UnescapeRule::NONE) {	901 if (output_component) {

913 output->append(UTF8ToWideAndAdjustOffsets(	902 output_component->len =

914 spec.substr(in_component.begin, in_component.len),	903 static_cast<int>(output->length() - output_component_begin);

915 &offsets_into_component));

916 } else {

917 output->append(UTF16ToWideHack(

918 UnescapeAndDecodeUTF8URLComponentWithOffsets(

919 spec.substr(in_component.begin, in_component.len), unescape_rules,

920 &offsets_into_component)));

921 }	904 }

922 size_t new_component_len = output->length() - component_begin;	905 AdjustForComponentTransform(original_offsets, original_component_begin,

923 out_component->len = static_cast<int>(new_component_len);	906 static_cast<size_t>(original_component.end()),

924	907 offsets_into_component, output_component_begin,

925 // Apply offset adjustments.	908 offsets_for_adjustment);

926 size_t old_component_len = static_cast<size_t>(in_component.len);	909 } else if (output_component) {

927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,	910 output_component->reset();

928 old_component_len, new_component_len, component_begin);

929 } else {

930 out_component->reset();

931 }	911 }

932 }	912 }

933	913

934 // TODO(viettrungluu): This is really the old-fashioned version, made internal.

935 // I need to really convert \|FormatUrl()\|.

936 std::wstring FormatUrlInternal(const GURL& url,

937 const std::wstring& languages,

938 FormatUrlTypes format_types,

939 UnescapeRule::Type unescape_rules,

940 url_parse::Parsed* new_parsed,

941 size_t* prefix_end,

942 std::vector<size_t>* offsets_for_adjustment) {

943 url_parse::Parsed parsed_temp;

944 if (!new_parsed)

945 new_parsed = &parsed_temp;

946 else

947 *new_parsed = url_parse::Parsed();

948

949 std::vector<size_t> offsets_temp;

950 if (!offsets_for_adjustment)

951 offsets_for_adjustment = &offsets_temp;

952

953 std::wstring url_string;

954

955 // Check for empty URLs or 0 available text width.

956 if (url.is_empty()) {

957 if (prefix_end)

958 *prefix_end = 0;

959 std::for_each(offsets_for_adjustment->begin(),

960 offsets_for_adjustment->end(),

961 LimitOffset<std::wstring>(0));

962 return url_string;

963 }

964

965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme

966 // because this library shouldn't depend on chrome.

967 const char* const kViewSource = "view-source";

968 // Reject "view-source:view-source:..." to avoid deep recursion.

969 const char* const kViewSourceTwice = "view-source:view-source:";

970 if (url.SchemeIs(kViewSource) &&

971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

972 return FormatViewSourceUrl(url, languages, format_types,

973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

974 }

975

976 // We handle both valid and invalid URLs (this will give us the spec

977 // regardless of validity).

978 const std::string& spec = url.possibly_invalid_spec();

979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

980 size_t spec_length = spec.length();

981 std::for_each(offsets_for_adjustment->begin(),

982 offsets_for_adjustment->end(),

983 LimitOffset<std::wstring>(spec_length));

984

985 // Copy everything before the username (the scheme and the separators.)

986 // These are ASCII.

987 url_string.insert(url_string.end(), spec.begin(),

988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,

989 true));

990

991 const wchar_t kHTTP[] = L"http://";

992 const char kFTP[] = "ftp.";

993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This

994 // means that if we trim "http://" off a URL whose host starts with "ftp." and

995 // the user inputs this into any field subject to fixup (which is basically

996 // all input fields), the meaning would be changed. (In fact, often the

997 // formatted URL is directly pre-filled into an input field.) For this reason

998 // we avoid stripping "http://" in this case.

999 bool omit_http =

1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) &&

1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);

1002

1003 new_parsed->scheme = parsed.scheme;

1004

1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {

1006 // Remove the username and password fields. We don't want to display those

1007 // to the user since they can be used for attacks,

1008 // e.g. "http://google.com:search@evil.ru/"

1009 new_parsed->username.reset();

1010 new_parsed->password.reset();

1011 // Update the offsets based on removed username and/or password.

1012 if (!offsets_for_adjustment->empty() &&

1013 (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty())) {

1014 AdjustOffset::Adjustments adjustments;

1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

1016 // The seeming off-by-one and off-by-two in these first two lines are to

1017 // account for the ':' after the username and '@' after the password.

1018 adjustments.push_back(AdjustOffset::Adjustment(

1019 static_cast<size_t>(parsed.username.begin),

1020 static_cast<size_t>(parsed.username.len + parsed.password.len +

1021 2), 0));

1022 } else {

1023 const url_parse::Component* nonempty_component =

1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

1025 // The seeming off-by-one in below is to account for the '@' after the

1026 // username/password.

1027 adjustments.push_back(AdjustOffset::Adjustment(

1028 static_cast<size_t>(nonempty_component->begin),

1029 static_cast<size_t>(nonempty_component->len + 1), 0));

1030 }

1031

1032 // Make offset adjustment.

1033 std::for_each(offsets_for_adjustment->begin(),

1034 offsets_for_adjustment->end(),

1035 AdjustOffset(adjustments));

1036 }

1037 } else {

1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,

1039 &new_parsed->username, offsets_for_adjustment);

1040 if (parsed.password.is_valid())

1041 url_string.push_back(':');

1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,

1043 &new_parsed->password, offsets_for_adjustment);

1044 if (parsed.username.is_valid() \|\| parsed.password.is_valid())

1045 url_string.push_back('@');

1046 }

1047 if (prefix_end)

1048 *prefix_end = static_cast<size_t>(url_string.length());

1049

1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,

1051 offsets_for_adjustment);

1052

1053 // Port.

1054 if (parsed.port.is_nonempty()) {

1055 url_string.push_back(':');

1056 new_parsed->port.begin = url_string.length();

1057 url_string.insert(url_string.end(),

1058 spec.begin() + parsed.port.begin,

1059 spec.begin() + parsed.port.end());

1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

1061 } else {

1062 new_parsed->port.reset();

1063 }

1064

1065 // Path and query both get the same general unescape & convert treatment.

1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|

1067 !CanStripTrailingSlash(url)) {

1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,

1069 &new_parsed->path, offsets_for_adjustment);

1070 }

1071 if (parsed.query.is_valid())

1072 url_string.push_back('?');

1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,

1074 &new_parsed->query, offsets_for_adjustment);

1075

1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert.

1077 if (parsed.ref.is_valid()) {

1078 url_string.push_back('#');

1079 size_t ref_begin = url_string.length();

1080 new_parsed->ref.begin = static_cast<int>(ref_begin);

1081

1082 // Compose a list of offsets within the section.

1083 std::vector<size_t> offsets_into_ref =

1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin);

1085

1086 if (parsed.ref.len > 0) {

1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,

1088 parsed.ref.len),

1089 &offsets_into_ref));

1090 }

1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len);

1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin;

1093 new_parsed->ref.len = static_cast<int>(new_ref_len);

1094

1095 // Apply offset adjustments.

1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,

1097 old_ref_len, new_ref_len, ref_begin);

1098 }

1099

1100 // If we need to strip out http do it after the fact. This way we don't need

1101 // to worry about how offset_for_adjustment is interpreted.

1102 const size_t kHTTPSize = arraysize(kHTTP) - 1;

1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {

1104 url_string = url_string.substr(kHTTPSize);

1105 AdjustOffset::Adjustments adjustments;

1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));

1107 std::for_each(offsets_for_adjustment->begin(),

1108 offsets_for_adjustment->end(),

1109 AdjustOffset(adjustments));

1110 if (prefix_end)

1111 *prefix_end -= kHTTPSize;

1112

1113 // Adjust new_parsed.

1114 DCHECK(new_parsed->scheme.is_valid());

1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.

1116 new_parsed->scheme.reset();

1117 AdjustComponents(delta, new_parsed);

1118 }

1119

1120 return url_string;

1121 }

1122

1123 } // namespace	914 } // namespace

1124	915

1125 const FormatUrlType kFormatUrlOmitNothing = 0;	916 const FormatUrlType kFormatUrlOmitNothing = 0;

1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;	917 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;

1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;	918 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;

1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;	919 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;

1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|	920 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|

1130 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;	921 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;

1131	922

1132 // TODO(viettrungluu): We don't want non-POD globals; change this.	923 // TODO(viettrungluu): We don't want non-POD globals; change this.

(...skipping 23 matching lines...) Expand all Loading...
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));	947 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));

1157	948

1158 #if defined(OS_POSIX)	949 #if defined(OS_POSIX)

1159 ReplaceSubstringsAfterOffset(&url_string, 0,	950 ReplaceSubstringsAfterOffset(&url_string, 0,

1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));	951 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));

1161 #endif	952 #endif

1162	953

1163 return GURL(url_string);	954 return GURL(url_string);

1164 }	955 }

1165	956

1166 std::wstring GetSpecificHeader(const std::wstring& headers,

1167 const std::wstring& name) {

1168 return GetSpecificHeaderT(headers, name);

1169 }

1170

1171 std::string GetSpecificHeader(const std::string& headers,	957 std::string GetSpecificHeader(const std::string& headers,

1172 const std::string& name) {	958 const std::string& name) {

1173 return GetSpecificHeaderT(headers, name);	959 // We want to grab the Value from the "Key: Value" pairs in the headers,

	960 // which should look like this (no leading spaces, \n-separated) (we format

	961 // them this way in url_request_inet.cc):

	962 // HTTP/1.1 200 OK\n

	963 // ETag: "6d0b8-947-24f35ec0"\n

	964 // Content-Length: 2375\n

	965 // Content-Type: text/html; charset=UTF-8\n

	966 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n

	967 if (headers.empty())

	968 return std::string();

	969

	970 std::string match('\n' + name + ':');

	971

	972 std::string::const_iterator begin =

	973 search(headers.begin(), headers.end(), match.begin(), match.end(),

	974 base::CaseInsensitiveCompareASCII<char>());

	975

	976 if (begin == headers.end())

	977 return std::string();

	978

	979 begin += match.length();

	980

	981 std::string ret;

	982 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL,

	983 &ret);

	984 return ret;

1174 }	985 }

1175	986

1176 bool DecodeCharset(const std::string& input,	987 bool DecodeCharset(const std::string& input,

1177 std::string* decoded_charset,	988 std::string* decoded_charset,

1178 std::string* value) {	989 std::string* value) {

1179 StringTokenizer t(input, "'");	990 StringTokenizer t(input, "'");

1180 t.set_options(StringTokenizer::RETURN_DELIMS);	991 t.set_options(StringTokenizer::RETURN_DELIMS);

1181 std::string temp_charset;	992 std::string temp_charset;

1182 std::string temp_value;	993 std::string temp_value;

1183 int numDelimsSeen = 0;	994 int numDelimsSeen = 0;

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1238 param_value = GetHeaderParamValue(header, "name",	1049 param_value = GetHeaderParamValue(header, "name",

1239 QuoteRule::REMOVE_OUTER_QUOTES);	1050 QuoteRule::REMOVE_OUTER_QUOTES);

1240 }	1051 }

1241 if (param_value.empty())	1052 if (param_value.empty())

1242 return std::string();	1053 return std::string();

1243 if (DecodeParamValue(param_value, referrer_charset, &decoded))	1054 if (DecodeParamValue(param_value, referrer_charset, &decoded))

1244 return decoded;	1055 return decoded;

1245 return std::string();	1056 return std::string();

1246 }	1057 }

1247	1058

1248 std::wstring GetHeaderParamValue(const std::wstring& field,	1059 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

1249 const std::wstring& param_name,	1060 // sure this doesn't properly handle all (most?) cases.

1250 QuoteRule::Type quote_rule) {	1061 std::string GetHeaderParamValue(const std::string& header,

1251 return GetHeaderParamValueT(field, param_name, quote_rule);	1062 const std::string& param_name,

	1063 QuoteRule::Type quote_rule) {

	1064 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".

	1065 std::string::const_iterator param_begin =

	1066 search(header.begin(), header.end(), param_name.begin(), param_name.end(),

	1067 base::CaseInsensitiveCompareASCII<char>());

	1068

	1069 if (param_begin == header.end())

	1070 return std::string();

	1071 param_begin += param_name.length();

	1072

	1073 std::string whitespace(" \t");

	1074 size_t equals_offset =

	1075 header.find_first_not_of(whitespace, param_begin - header.begin());

	1076 if (equals_offset == std::string::npos \|\| header[equals_offset] != '=')

	1077 return std::string();

	1078

	1079 param_begin = header.begin() + equals_offset + 1;

	1080 if (param_begin == header.end())

	1081 return std::string();

	1082

	1083 std::string::const_iterator param_end;

	1084 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {

	1085 ++param_begin; // skip past the quote.

	1086 param_end = find(param_begin, header.end(), '"');

	1087 // If the closing quote is missing, we will treat the rest of the

	1088 // string as the parameter. We can't set \|param_end\| to the

	1089 // location of the separator (';'), since the separator is

	1090 // technically quoted. See: http://crbug.com/58840

	1091 } else {

	1092 param_end = find(param_begin + 1, header.end(), ';');

	1093 }

	1094

	1095 return std::string(param_begin, param_end);

1252 }	1096 }

1253	1097

1254 std::string GetHeaderParamValue(const std::string& field,	1098 string16 IDNToUnicode(const std::string& host,

1255 const std::string& param_name,	1099 const std::string& languages) {

1256 QuoteRule::Type quote_rule) {

1257 return GetHeaderParamValueT(field, param_name, quote_rule);

1258 }

1259

1260 // TODO(brettw) bug 734373: check the scripts for each host component and

1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

1262 // scripts that the user has installed. For now, just put the entire

1263 // path through IDN. Maybe this feature can be implemented in ICU itself?

1264 //

1265 // We may want to skip this step in the case of file URLs to allow unicode

1266 // UNC hostnames regardless of encodings.

1267 std::wstring IDNToUnicodeWithOffsets(

1268 const char* host,

1269 size_t host_len,

1270 const std::wstring& languages,

1271 std::vector<size_t>* offsets_for_adjustment) {

1272 // Convert the ASCII input to a wide string for ICU.

1273 string16 input16;

1274 input16.reserve(host_len);

1275 input16.insert(input16.end(), host, host + host_len);

1276

1277 // Do each component of the host separately, since we enforce script matching

1278 // on a per-component basis.

1279 AdjustOffset::Adjustments adjustments;

1280 string16 out16;

1281 for (size_t component_start = 0, component_end;

1282 component_start < input16.length();

1283 component_start = component_end + 1) {

1284 // Find the end of the component.

1285 component_end = input16.find('.', component_start);

1286 if (component_end == string16::npos)

1287 component_end = input16.length(); // For getting the last component.

1288 size_t component_length = component_end - component_start;

1289 size_t new_component_start = out16.length();

1290 bool converted_idn = false;

1291 if (component_end > component_start) {

1292 // Add the substring that we just found.

1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,

1294 component_length, languages, &out16);

1295 }

1296 size_t new_component_length = out16.length() - new_component_start;

1297

1298 if (converted_idn && offsets_for_adjustment) {

1299 adjustments.push_back(AdjustOffset::Adjustment(

1300 component_start, component_length, new_component_length));

1301 }

1302

1303 // Need to add the dot we just found (if we found one).

1304 if (component_end < input16.length())

1305 out16.push_back('.');

1306 }

1307

1308 // Make offset adjustment.

1309 if (offsets_for_adjustment && !adjustments.empty()) {

1310 std::for_each(offsets_for_adjustment->begin(),

1311 offsets_for_adjustment->end(),

1312 AdjustOffset(adjustments));

1313 }

1314

1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);

1316 }

1317

1318 std::wstring IDNToUnicode(const char* host,

1319 size_t host_len,

1320 const std::wstring& languages,

1321 size_t* offset_for_adjustment) {

1322 std::vector<size_t> offsets;	1100 std::vector<size_t> offsets;

1323 if (offset_for_adjustment)	1101 return IDNToUnicodeWithOffsets(host, languages, &offsets);

1324 offsets.push_back(*offset_for_adjustment);

1325 std::wstring result =

1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);

1327 if (offset_for_adjustment)

1328 *offset_for_adjustment = offsets[0];

1329 return result;

1330 }	1102 }

1331	1103

1332 std::string CanonicalizeHost(const std::string& host,	1104 std::string CanonicalizeHost(const std::string& host,

1333 url_canon::CanonHostInfo* host_info) {	1105 url_canon::CanonHostInfo* host_info) {

1334 // Try to canonicalize the host.	1106 // Try to canonicalize the host.

1335 const url_parse::Component raw_host_component(	1107 const url_parse::Component raw_host_component(

1336 0, static_cast<int>(host.length()));	1108 0, static_cast<int>(host.length()));

1337 std::string canon_host;	1109 std::string canon_host;

1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host);	1110 url_canon::StdStringCanonOutput canon_host_output(&canon_host);

1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,	1111 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,

1340 &canon_host_output, host_info);	1112 &canon_host_output, host_info);

1341	1113

1342 if (host_info->out_host.is_nonempty() &&	1114 if (host_info->out_host.is_nonempty() &&

1343 host_info->family != url_canon::CanonHostInfo::BROKEN) {	1115 host_info->family != url_canon::CanonHostInfo::BROKEN) {

1344 // Success! Assert that there's no extra garbage.	1116 // Success! Assert that there's no extra garbage.

1345 canon_host_output.Complete();	1117 canon_host_output.Complete();

1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));	1118 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));

1347 } else {	1119 } else {

1348 // Empty host, or canonicalization failed. We'll return empty.	1120 // Empty host, or canonicalization failed. We'll return empty.

1349 canon_host.clear();	1121 canon_host.clear();

1350 }	1122 }

1351	1123

1352 return canon_host;	1124 return canon_host;

1353 }	1125 }

1354	1126

1355 std::string CanonicalizeHost(const std::wstring& host,

1356 url_canon::CanonHostInfo* host_info) {

1357 std::string converted_host;

1358 WideToUTF8(host.c_str(), host.length(), &converted_host);

1359 return CanonicalizeHost(converted_host, host_info);

1360 }

1361

1362 std::string GetDirectoryListingHeader(const string16& title) {	1127 std::string GetDirectoryListingHeader(const string16& title) {

1363 static const base::StringPiece header(	1128 static const base::StringPiece header(

1364 NetModule::GetResource(IDR_DIR_HEADER_HTML));	1129 NetModule::GetResource(IDR_DIR_HEADER_HTML));

1365 // This can be null in unit tests.	1130 // This can be null in unit tests.

1366 DLOG_IF(WARNING, header.empty()) <<	1131 DLOG_IF(WARNING, header.empty()) <<

1367 "Missing resource: directory listing header";	1132 "Missing resource: directory listing header";

1368	1133

1369 std::string result;	1134 std::string result;

1370 if (!header.empty())	1135 if (!header.empty())

1371 result.assign(header.data(), header.size());	1136 result.assign(header.data(), header.size());

(...skipping 360 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1732 UnescapeRule::Type flags =	1497 UnescapeRule::Type flags =

1733 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS;	1498 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS;

1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);	1499 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);

1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);	1500 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);

1736 }	1501 }

1737	1502

1738 std::string GetHostOrSpecFromURL(const GURL& url) {	1503 std::string GetHostOrSpecFromURL(const GURL& url) {

1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();	1504 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();

1740 }	1505 }

1741	1506

1742 void AppendFormattedHostWithOffsets(	1507 void AppendFormattedHost(const GURL& url,

1743 const GURL& url,	1508 const std::string& languages,

1744 const std::wstring& languages,	1509 string16* output) {

1745 std::wstring* output,	1510 std::vector<size_t> offsets;

1746 url_parse::Parsed* new_parsed,	1511 AppendFormattedComponent(url.possibly_invalid_spec(),

1747 std::vector<size_t>* offsets_for_adjustment) {	1512 url.parsed_for_possibly_invalid_spec().host, offsets,

1748 DCHECK(output);	1513 HostComponentTransform(languages), output, NULL, NULL);

1749 const url_parse::Component& host =

1750 url.parsed_for_possibly_invalid_spec().host;

1751

1752 if (host.is_nonempty()) {

1753 // Handle possible IDN in the host name.

1754 size_t host_begin = output->length();

1755 if (new_parsed)

1756 new_parsed->host.begin = static_cast<int>(host_begin);

1757 size_t old_host_len = static_cast<size_t>(host.len);

1758

1759 // Compose a list of offsets within the host area.

1760 std::vector<size_t> offsets_into_host =

1761 OffsetsIntoSection(offsets_for_adjustment, host_begin);

1762

1763 const std::string& spec = url.possibly_invalid_spec();

1764 DCHECK(host.begin >= 0 &&

1765 ((spec.length() == 0 && host.begin == 0) \|\|

1766 host.begin < static_cast<int>(spec.length())));

1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,

1768 languages, &offsets_into_host));

1769

1770 size_t new_host_len = output->length() - host_begin;

1771 if (new_parsed)

1772 new_parsed->host.len = static_cast<int>(new_host_len);

1773

1774 // Apply offset adjustments.

1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,

1776 old_host_len, new_host_len, host_begin);

1777 } else if (new_parsed) {

1778 new_parsed->host.reset();

1779 }

1780 }	1514 }

1781	1515

1782 void AppendFormattedHost(const GURL& url,

1783 const std::wstring& languages,

1784 std::wstring* output,

1785 url_parse::Parsed* new_parsed,

1786 size_t* offset_for_adjustment) {

1787 std::vector<size_t> offsets;

1788 if (offset_for_adjustment)

1789 offsets.push_back(*offset_for_adjustment);

1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);

1791 if (offset_for_adjustment)

1792 *offset_for_adjustment = offsets[0];

1793 }

1794

1795 // TODO(viettrungluu): convert the wstring \|FormatUrlInternal()\|.

1796 string16 FormatUrlWithOffsets(const GURL& url,	1516 string16 FormatUrlWithOffsets(const GURL& url,

1797 const std::string& languages,	1517 const std::string& languages,

1798 FormatUrlTypes format_types,	1518 FormatUrlTypes format_types,

1799 UnescapeRule::Type unescape_rules,	1519 UnescapeRule::Type unescape_rules,

1800 url_parse::Parsed* new_parsed,	1520 url_parse::Parsed* new_parsed,

1801 size_t* prefix_end,	1521 size_t* prefix_end,

1802 std::vector<size_t>* offsets_for_adjustment) {	1522 std::vector<size_t>* offsets_for_adjustment) {

1803 return WideToUTF16Hack(	1523 url_parse::Parsed parsed_temp;

1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types,	1524 if (!new_parsed)

1805 unescape_rules, new_parsed, prefix_end,	1525 new_parsed = &parsed_temp;

1806 offsets_for_adjustment));	1526 else

	1527 *new_parsed = url_parse::Parsed();

	1528 std::vector<size_t> original_offsets;

	1529 if (offsets_for_adjustment)

	1530 original_offsets = *offsets_for_adjustment;

	1531

	1532 // Special handling for view-source:. Don't use chrome::kViewSourceScheme

	1533 // because this library shouldn't depend on chrome.

	1534 const char* const kViewSource = "view-source";

	1535 // Reject "view-source:view-source:..." to avoid deep recursion.

	1536 const char* const kViewSourceTwice = "view-source:view-source:";

	1537 if (url.SchemeIs(kViewSource) &&

	1538 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

	1539 return FormatViewSourceUrl(url, original_offsets, languages, format_types,

	1540 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

	1541 }

	1542

	1543 // We handle both valid and invalid URLs (this will give us the spec

	1544 // regardless of validity).

	1545 const std::string& spec = url.possibly_invalid_spec();

	1546 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

	1547

	1548 // Scheme & separators. These are ASCII.

	1549 string16 url_string;

	1550 url_string.insert(url_string.end(), spec.begin(),

	1551 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,

	1552 true));

	1553 const char kHTTP[] = "http://";

	1554 const char kFTP[] = "ftp.";

	1555 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This

	1556 // means that if we trim "http://" off a URL whose host starts with "ftp." and

	1557 // the user inputs this into any field subject to fixup (which is basically

	1558 // all input fields), the meaning would be changed. (In fact, often the

	1559 // formatted URL is directly pre-filled into an input field.) For this reason

	1560 // we avoid stripping "http://" in this case.

	1561 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&

	1562 EqualsASCII(url_string, kHTTP) &&

	1563 !StartsWithASCII(url.host(), kFTP, true);

	1564 new_parsed->scheme = parsed.scheme;

	1565

	1566 // Username & password.

	1567 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {

	1568 // Remove the username and password fields. We don't want to display those

	1569 // to the user since they can be used for attacks,

	1570 // e.g. "http://google.com:search@evil.ru/"

	1571 new_parsed->username.reset();

	1572 new_parsed->password.reset();

	1573 // Update the offsets based on removed username and/or password.

	1574 if (offsets_for_adjustment && !offsets_for_adjustment->empty() &&

	1575 (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty())) {

	1576 OffsetAdjuster offset_adjuster(offsets_for_adjustment);

	1577 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

	1578 // The seeming off-by-one and off-by-two in these first two lines are to

	1579 // account for the ':' after the username and '@' after the password.

	1580 offset_adjuster.Add(OffsetAdjuster::Adjustment(

	1581 static_cast<size_t>(parsed.username.begin),

	1582 static_cast<size_t>(parsed.username.len + parsed.password.len + 2),

	1583 0));

	1584 } else {

	1585 const url_parse::Component* nonempty_component =

	1586 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

	1587 // The seeming off-by-one in below is to account for the '@' after the

	1588 // username/password.

	1589 offset_adjuster.Add(OffsetAdjuster::Adjustment(

	1590 static_cast<size_t>(nonempty_component->begin),

	1591 static_cast<size_t>(nonempty_component->len + 1), 0));

	1592 }

	1593 }

	1594 } else {

	1595 AppendFormattedComponent(spec, parsed.username, original_offsets,

	1596 NonHostComponentTransform(unescape_rules), &url_string,

	1597 &new_parsed->username, offsets_for_adjustment);

	1598 if (parsed.password.is_valid()) {

	1599 size_t colon = parsed.username.end();

	1600 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon);

	1601 std::vector<size_t>::const_iterator colon_iter =

	1602 std::find(original_offsets.begin(), original_offsets.end(), colon);

	1603 if (colon_iter != original_offsets.end()) {

	1604 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] =

	1605 url_string.length();

	1606 }

	1607 url_string.push_back(':');

	1608 }

	1609 AppendFormattedComponent(spec, parsed.password, original_offsets,

	1610 NonHostComponentTransform(unescape_rules), &url_string,

	1611 &new_parsed->password, offsets_for_adjustment);

	1612 if (parsed.username.is_valid() \|\| parsed.password.is_valid()) {

	1613 size_t at_sign = (parsed.password.is_valid() ?

	1614 parsed.password : parsed.username).end();

	1615 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign);

	1616 std::vector<size_t>::const_iterator at_sign_iter =

	1617 std::find(original_offsets.begin(), original_offsets.end(), at_sign);

	1618 if (at_sign_iter != original_offsets.end()) {

	1619 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] =

	1620 url_string.length();

	1621 }

	1622 url_string.push_back('@');

	1623 }

	1624 }

	1625 if (prefix_end)

	1626 *prefix_end = static_cast<size_t>(url_string.length());

	1627

	1628 // Host.

	1629 AppendFormattedComponent(spec, parsed.host, original_offsets,

	1630 HostComponentTransform(languages), &url_string, &new_parsed->host,

	1631 offsets_for_adjustment);

	1632

	1633 // Port.

	1634 if (parsed.port.is_nonempty()) {

	1635 url_string.push_back(':');

	1636 new_parsed->port.begin = url_string.length();

	1637 url_string.insert(url_string.end(),

	1638 spec.begin() + parsed.port.begin,

	1639 spec.begin() + parsed.port.end());

	1640 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

	1641 } else {

	1642 new_parsed->port.reset();

	1643 }

	1644

	1645 // Path & query. Both get the same general unescape & convert treatment.

	1646 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|

	1647 !CanStripTrailingSlash(url)) {

	1648 AppendFormattedComponent(spec, parsed.path, original_offsets,

	1649 NonHostComponentTransform(unescape_rules), &url_string,

	1650 &new_parsed->path, offsets_for_adjustment);

	1651 }

	1652 if (parsed.query.is_valid())

	1653 url_string.push_back('?');

	1654 AppendFormattedComponent(spec, parsed.query, original_offsets,

	1655 NonHostComponentTransform(unescape_rules), &url_string,

	1656 &new_parsed->query, offsets_for_adjustment);

	1657

	1658 // Ref. This is valid, unescaped UTF-8, so we can just convert.

	1659 if (parsed.ref.is_valid()) {

	1660 url_string.push_back('#');

	1661 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin);

	1662 size_t output_ref_begin = url_string.length();

	1663 new_parsed->ref.begin = static_cast<int>(output_ref_begin);

	1664

	1665 std::vector<size_t> offsets_into_ref(

	1666 OffsetsIntoComponent(original_offsets, original_ref_begin));

	1667 if (parsed.ref.len > 0) {

	1668 url_string.append(UTF8ToUTF16AndAdjustOffsets(

	1669 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)),

	1670 &offsets_into_ref));

	1671 }

	1672

	1673 new_parsed->ref.len =

	1674 static_cast<int>(url_string.length() - new_parsed->ref.begin);

	1675 AdjustForComponentTransform(original_offsets, original_ref_begin,

	1676 static_cast<size_t>(parsed.ref.end()), offsets_into_ref,

	1677 output_ref_begin, offsets_for_adjustment);

	1678 }

	1679

	1680 // If we need to strip out http do it after the fact. This way we don't need

	1681 // to worry about how offset_for_adjustment is interpreted.

	1682 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) {

	1683 const size_t kHTTPSize = arraysize(kHTTP) - 1;

	1684 url_string = url_string.substr(kHTTPSize);

	1685 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) {

	1686 OffsetAdjuster offset_adjuster(offsets_for_adjustment);

	1687 offset_adjuster.Add(OffsetAdjuster::Adjustment(0, kHTTPSize, 0));

	1688 }

	1689 if (prefix_end)

	1690 *prefix_end -= kHTTPSize;

	1691

	1692 // Adjust new_parsed.

	1693 DCHECK(new_parsed->scheme.is_valid());

	1694 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.

	1695 new_parsed->scheme.reset();

	1696 AdjustComponents(delta, new_parsed);

	1697 }

	1698

	1699 LimitOffsets(url_string, offsets_for_adjustment);

	1700 return url_string;

1807 }	1701 }

1808	1702

1809 string16 FormatUrl(const GURL& url,	1703 string16 FormatUrl(const GURL& url,

1810 const std::string& languages,	1704 const std::string& languages,

1811 FormatUrlTypes format_types,	1705 FormatUrlTypes format_types,

1812 UnescapeRule::Type unescape_rules,	1706 UnescapeRule::Type unescape_rules,

1813 url_parse::Parsed* new_parsed,	1707 url_parse::Parsed* new_parsed,

1814 size_t* prefix_end,	1708 size_t* prefix_end,

1815 size_t* offset_for_adjustment) {	1709 size_t* offset_for_adjustment) {

1816 std::vector<size_t> offsets;	1710 std::vector<size_t> offsets;

1817 if (offset_for_adjustment)	1711 if (offset_for_adjustment)

1818 offsets.push_back(*offset_for_adjustment);	1712 offsets.push_back(*offset_for_adjustment);

1819 string16 result = WideToUTF16Hack(	1713 string16 result = FormatUrlWithOffsets(url, languages, format_types,

1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types,	1714 unescape_rules, new_parsed, prefix_end, &offsets);

1821 unescape_rules, new_parsed, prefix_end, &offsets));

1822 if (offset_for_adjustment)	1715 if (offset_for_adjustment)

1823 *offset_for_adjustment = offsets[0];	1716 *offset_for_adjustment = offsets[0];

1824 return result;	1717 return result;

1825 }	1718 }

1826	1719

1827 bool CanStripTrailingSlash(const GURL& url) {	1720 bool CanStripTrailingSlash(const GURL& url) {

1828 // Omit the path only for standard, non-file URLs with nothing but "/" after	1721 // Omit the path only for standard, non-file URLs with nothing but "/" after

1829 // the hostname.	1722 // the hostname.

1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&	1723 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&

1831 !url.has_ref() && url.path() == "/";	1724 !url.has_ref() && url.path() == "/";

(...skipping 435 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2267 }	2160 }

2268	2161

2269 NetworkInterface::NetworkInterface(const std::string& name,	2162 NetworkInterface::NetworkInterface(const std::string& name,

2270 const IPAddressNumber& address)	2163 const IPAddressNumber& address)

2271 : name(name), address(address) {	2164 : name(name), address(address) {

2272 }	2165 }

2273	2166

2274 NetworkInterface::~NetworkInterface() {	2167 NetworkInterface::~NetworkInterface() {

2275 }	2168 }

2276	2169

2277 ClampComponentOffset::ClampComponentOffset(size_t component_start)

2278 : component_start(component_start) {}

2279

2280 size_t ClampComponentOffset::operator()(size_t offset) {

2281 return (offset >= component_start) ?

2282 offset : std::wstring::npos;

2283 }

2284

2285 } // namespace net	2170 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | net/base/net_util_unittest.cc » ('J')