url/url_canon_stdurl.cc - Issue 13821004: Move googleurl into the Chrome repo.

Side by Side Diff: url/url_canon_stdurl.cc

Issue 13821004: Move googleurl into the Chrome repo. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright 2007, Google Inc.

	2 // All rights reserved.

	3 //

	4 // Redistribution and use in source and binary forms, with or without

	5 // modification, are permitted provided that the following conditions are

	6 // met:

	7 //

	8 // * Redistributions of source code must retain the above copyright

	9 // notice, this list of conditions and the following disclaimer.

	10 // * Redistributions in binary form must reproduce the above

	11 // copyright notice, this list of conditions and the following disclaimer

	12 // in the documentation and/or other materials provided with the

	13 // distribution.

	14 // * Neither the name of Google Inc. nor the names of its

	15 // contributors may be used to endorse or promote products derived from

	16 // this software without specific prior written permission.

	17 //

	18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29

	30 // Functions to canonicalize "standard" URLs, which are ones that have an

	31 // authority section including a host name.

	32

	33 #include "googleurl/src/url_canon.h"

	34 #include "googleurl/src/url_canon_internal.h"

	35

	36 namespace url_canon {

	37

	38 namespace {

	39

	40 template<typename CHAR, typename UCHAR>

	41 bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,

	42 const url_parse::Parsed& parsed,

	43 CharsetConverter* query_converter,

	44 CanonOutput* output,

	45 url_parse::Parsed* new_parsed) {

	46 // Scheme: this will append the colon.

	47 bool success = CanonicalizeScheme(source.scheme, parsed.scheme,

	48 output, &new_parsed->scheme);

	49

	50 // Authority (username, password, host, port)

	51 bool have_authority;

	52 if (parsed.username.is_valid() \|\| parsed.password.is_valid() \|\|

	53 parsed.host.is_nonempty() \|\| parsed.port.is_valid()) {

	54 have_authority = true;

	55

	56 // Only write the authority separators when we have a scheme.

	57 if (parsed.scheme.is_valid()) {

	58 output->push_back('/');

	59 output->push_back('/');

	60 }

	61

	62 // User info: the canonicalizer will handle the : and @.

	63 success &= CanonicalizeUserInfo(source.username, parsed.username,

	64 source.password, parsed.password,

	65 output,

	66 &new_parsed->username,

	67 &new_parsed->password);

	68

	69 success &= CanonicalizeHost(source.host, parsed.host,

	70 output, &new_parsed->host);

	71

	72 // Host must not be empty for standard URLs.

	73 if (!parsed.host.is_nonempty())

	74 success = false;

	75

	76 // Port: the port canonicalizer will handle the colon.

	77 int default_port = DefaultPortForScheme(

	78 &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);

	79 success &= CanonicalizePort(source.port, parsed.port, default_port,

	80 output, &new_parsed->port);

	81 } else {

	82 // No authority, clear the components.

	83 have_authority = false;

	84 new_parsed->host.reset();

	85 new_parsed->username.reset();

	86 new_parsed->password.reset();

	87 new_parsed->port.reset();

	88 success = false; // Standard URLs must have an authority.

	89 }

	90

	91 // Path

	92 if (parsed.path.is_valid()) {

	93 success &= CanonicalizePath(source.path, parsed.path,

	94 output, &new_parsed->path);

	95 } else if (have_authority \|\|

	96 parsed.query.is_valid() \|\| parsed.ref.is_valid()) {

	97 // When we have an empty path, make up a path when we have an authority

	98 // or something following the path. The only time we allow an empty

	99 // output path is when there is nothing else.

	100 new_parsed->path = url_parse::Component(output->length(), 1);

	101 output->push_back('/');

	102 } else {

	103 // No path at all

	104 new_parsed->path.reset();

	105 }

	106

	107 // Query

	108 CanonicalizeQuery(source.query, parsed.query, query_converter,

	109 output, &new_parsed->query);

	110

	111 // Ref: ignore failure for this, since the page can probably still be loaded.

	112 CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);

	113

	114 return success;

	115 }

	116

	117 } // namespace

	118

	119

	120 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED

	121 // if the scheme is unknown.

	122 int DefaultPortForScheme(const char* scheme, int scheme_len) {

	123 int default_port = url_parse::PORT_UNSPECIFIED;

	124 switch (scheme_len) {

	125 case 4:

	126 if (!strncmp(scheme, "http", scheme_len))

	127 default_port = 80;

	128 break;

	129 case 5:

	130 if (!strncmp(scheme, "https", scheme_len))

	131 default_port = 443;

	132 break;

	133 case 3:

	134 if (!strncmp(scheme, "ftp", scheme_len))

	135 default_port = 21;

	136 else if (!strncmp(scheme, "wss", scheme_len))

	137 default_port = 443;

	138 break;

	139 case 6:

	140 if (!strncmp(scheme, "gopher", scheme_len))

	141 default_port = 70;

	142 break;

	143 case 2:

	144 if (!strncmp(scheme, "ws", scheme_len))

	145 default_port = 80;

	146 break;

	147 }

	148 return default_port;

	149 }

	150

	151 bool CanonicalizeStandardURL(const char* spec,

	152 int spec_len,

	153 const url_parse::Parsed& parsed,

	154 CharsetConverter* query_converter,

	155 CanonOutput* output,

	156 url_parse::Parsed* new_parsed) {

	157 return DoCanonicalizeStandardURL<char, unsigned char>(

	158 URLComponentSource<char>(spec), parsed, query_converter,

	159 output, new_parsed);

	160 }

	161

	162 bool CanonicalizeStandardURL(const char16* spec,

	163 int spec_len,

	164 const url_parse::Parsed& parsed,

	165 CharsetConverter* query_converter,

	166 CanonOutput* output,

	167 url_parse::Parsed* new_parsed) {

	168 return DoCanonicalizeStandardURL<char16, char16>(

	169 URLComponentSource<char16>(spec), parsed, query_converter,

	170 output, new_parsed);

	171 }

	172

	173 // It might be nice in the future to optimize this so unchanged components don't

	174 // need to be recanonicalized. This is especially true since the common case for

	175 // ReplaceComponents is removing things we don't want, like reference fragments

	176 // and usernames. These cases can become more efficient if we can assume the

	177 // rest of the URL is OK with these removed (or only the modified parts

	178 // recanonicalized). This would be much more complex to implement, however.

	179 //

	180 // You would also need to update DoReplaceComponents in url_util.cc which

	181 // relies on this re-checking everything (see the comment there for why).

	182 bool ReplaceStandardURL(const char* base,

	183 const url_parse::Parsed& base_parsed,

	184 const Replacements<char>& replacements,

	185 CharsetConverter* query_converter,

	186 CanonOutput* output,

	187 url_parse::Parsed* new_parsed) {

	188 URLComponentSource<char> source(base);

	189 url_parse::Parsed parsed(base_parsed);

	190 SetupOverrideComponents(base, replacements, &source, &parsed);

	191 return DoCanonicalizeStandardURL<char, unsigned char>(

	192 source, parsed, query_converter, output, new_parsed);

	193 }

	194

	195 // For 16-bit replacements, we turn all the replacements into UTF-8 so the

	196 // regular codepath can be used.

	197 bool ReplaceStandardURL(const char* base,

	198 const url_parse::Parsed& base_parsed,

	199 const Replacements<char16>& replacements,

	200 CharsetConverter* query_converter,

	201 CanonOutput* output,

	202 url_parse::Parsed* new_parsed) {

	203 RawCanonOutput<1024> utf8;

	204 URLComponentSource<char> source(base);

	205 url_parse::Parsed parsed(base_parsed);

	206 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);

	207 return DoCanonicalizeStandardURL<char, unsigned char>(

	208 source, parsed, query_converter, output, new_parsed);

	209 }

	210

	211 } // namespace url_canon

OLD	NEW

« no previous file with comments | « url/url_canon_stdstring.h ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »