url/url_parse.cc - Issue 13821004: Move googleurl into the Chrome repo.

Side by Side Diff: url/url_parse.cc

Issue 13821004: Move googleurl into the Chrome repo. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /* Based on nsURLParsers.cc from Mozilla

	2 * -------------------------------------

	3 * The contents of this file are subject to the Mozilla Public License Version

	4 * 1.1 (the "License"); you may not use this file except in compliance with

	5 * the License. You may obtain a copy of the License at

	6 * http://www.mozilla.org/MPL/

	7 *

	8 * Software distributed under the License is distributed on an "AS IS" basis,

	9 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

	10 * for the specific language governing rights and limitations under the

	11 * License.

	12 *

	13 * The Original Code is mozilla.org code.

	14 *

	15 * The Initial Developer of the Original Code is

	16 * Netscape Communications Corporation.

	17 * Portions created by the Initial Developer are Copyright (C) 1998

	18 * the Initial Developer. All Rights Reserved.

	19 *

	20 * Contributor(s):

	21 * Darin Fisher (original author)

	22 *

	23 * Alternatively, the contents of this file may be used under the terms of

	24 * either the GNU General Public License Version 2 or later (the "GPL"), or

	25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

	26 * in which case the provisions of the GPL or the LGPL are applicable instead

	27 * of those above. If you wish to allow use of your version of this file only

	28 * under the terms of either the GPL or the LGPL, and not to allow others to

	29 * use your version of this file under the terms of the MPL, indicate your

	30 * decision by deleting the provisions above and replace them with the notice

	31 * and other provisions required by the GPL or the LGPL. If you do not delete

	32 * the provisions above, a recipient may use your version of this file under

	33 * the terms of any one of the MPL, the GPL or the LGPL.

	34 *

	35 * *** END LICENSE BLOCK *** */

	36

	37 #include "googleurl/src/url_parse.h"

	38

	39 #include <stdlib.h>

	40

	41 #include "base/logging.h"

	42 #include "googleurl/src/url_parse_internal.h"

	43 #include "googleurl/src/url_util.h"

	44 #include "googleurl/src/url_util_internal.h"

	45

	46 namespace url_parse {

	47

	48 namespace {

	49

	50 // Returns true if the given character is a valid digit to use in a port.

	51 inline bool IsPortDigit(char16 ch) {

	52 return ch >= '0' && ch <= '9';

	53 }

	54

	55 // Returns the offset of the next authority terminator in the input starting

	56 // from start_offset. If no terminator is found, the return value will be equal

	57 // to spec_len.

	58 template<typename CHAR>

	59 int FindNextAuthorityTerminator(const CHAR* spec,

	60 int start_offset,

	61 int spec_len) {

	62 for (int i = start_offset; i < spec_len; i++) {

	63 if (IsAuthorityTerminator(spec[i]))

	64 return i;

	65 }

	66 return spec_len; // Not found.

	67 }

	68

	69 template<typename CHAR>

	70 void ParseUserInfo(const CHAR* spec,

	71 const Component& user,

	72 Component* username,

	73 Component* password) {

	74 // Find the first colon in the user section, which separates the username and

	75 // password.

	76 int colon_offset = 0;

	77 while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')

	78 colon_offset++;

	79

	80 if (colon_offset < user.len) {

	81 // Found separator: <username>:<password>

	82 *username = Component(user.begin, colon_offset);

	83 *password = MakeRange(user.begin + colon_offset + 1,

	84 user.begin + user.len);

	85 } else {

	86 // No separator, treat everything as the username

	87 *username = user;

	88 *password = Component();

	89 }

	90 }

	91

	92 template<typename CHAR>

	93 void ParseServerInfo(const CHAR* spec,

	94 const Component& serverinfo,

	95 Component* hostname,

	96 Component* port_num) {

	97 if (serverinfo.len == 0) {

	98 // No server info, host name is empty.

	99 hostname->reset();

	100 port_num->reset();

	101 return;

	102 }

	103

	104 // If the host starts with a left-bracket, assume the entire host is an

	105 // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal.

	106 // This assumption will be overridden if we find a right-bracket.

	107 //

	108 // Our IPv6 address canonicalization code requires both brackets to exist,

	109 // but the ability to locate an incomplete address can still be useful.

	110 int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;

	111 int colon = -1;

	112

	113 // Find the last right-bracket, and the last colon.

	114 for (int i = serverinfo.begin; i < serverinfo.end(); i++) {

	115 switch (spec[i]) {

	116 case ']':

	117 ipv6_terminator = i;

	118 break;

	119 case ':':

	120 colon = i;

	121 break;

	122 }

	123 }

	124

	125 if (colon > ipv6_terminator) {

	126 // Found a port number: <hostname>:<port>

	127 *hostname = MakeRange(serverinfo.begin, colon);

	128 if (hostname->len == 0)

	129 hostname->reset();

	130 *port_num = MakeRange(colon + 1, serverinfo.end());

	131 } else {

	132 // No port: <hostname>

	133 *hostname = serverinfo;

	134 port_num->reset();

	135 }

	136 }

	137

	138 // Given an already-identified auth section, breaks it into its consituent

	139 // parts. The port number will be parsed and the resulting integer will be

	140 // filled into the given *port variable, or -1 if there is no port number or it

	141 // is invalid.

	142 template<typename CHAR>

	143 void DoParseAuthority(const CHAR* spec,

	144 const Component& auth,

	145 Component* username,

	146 Component* password,

	147 Component* hostname,

	148 Component* port_num) {

	149 DCHECK(auth.is_valid()) << "We should always get an authority";

	150 if (auth.len == 0) {

	151 username->reset();

	152 password->reset();

	153 hostname->reset();

	154 port_num->reset();

	155 return;

	156 }

	157

	158 // Search backwards for @, which is the separator between the user info and

	159 // the server info.

	160 int i = auth.begin + auth.len - 1;

	161 while (i > auth.begin && spec[i] != '@')

	162 i--;

	163

	164 if (spec[i] == '@') {

	165 // Found user info: <user-info>@<server-info>

	166 ParseUserInfo(spec, Component(auth.begin, i - auth.begin),

	167 username, password);

	168 ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),

	169 hostname, port_num);

	170 } else {

	171 // No user info, everything is server info.

	172 username->reset();

	173 password->reset();

	174 ParseServerInfo(spec, auth, hostname, port_num);

	175 }

	176 }

	177

	178 template<typename CHAR>

	179 void ParsePath(const CHAR* spec,

	180 const Component& path,

	181 Component* filepath,

	182 Component* query,

	183 Component* ref) {

	184 // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>

	185

	186 // Special case when there is no path.

	187 if (path.len == -1) {

	188 filepath->reset();

	189 query->reset();

	190 ref->reset();

	191 return;

	192 }

	193 DCHECK(path.len > 0) << "We should never have 0 length paths";

	194

	195 // Search for first occurrence of either ? or #.

	196 int path_end = path.begin + path.len;

	197

	198 int query_separator = -1; // Index of the '?'

	199 int ref_separator = -1; // Index of the '#'

	200 for (int i = path.begin; i < path_end; i++) {

	201 switch (spec[i]) {

	202 case '?':

	203 // Only match the query string if it precedes the reference fragment

	204 // and when we haven't found one already.

	205 if (ref_separator < 0 && query_separator < 0)

	206 query_separator = i;

	207 break;

	208 case '#':

	209 // Record the first # sign only.

	210 if (ref_separator < 0)

	211 ref_separator = i;

	212 break;

	213 }

	214 }

	215

	216 // Markers pointing to the character after each of these corresponding

	217 // components. The code below words from the end back to the beginning,

	218 // and will update these indices as it finds components that exist.

	219 int file_end, query_end;

	220

	221 // Ref fragment: from the # to the end of the path.

	222 if (ref_separator >= 0) {

	223 file_end = query_end = ref_separator;

	224 *ref = MakeRange(ref_separator + 1, path_end);

	225 } else {

	226 file_end = query_end = path_end;

	227 ref->reset();

	228 }

	229

	230 // Query fragment: everything from the ? to the next boundary (either the end

	231 // of the path or the ref fragment).

	232 if (query_separator >= 0) {

	233 file_end = query_separator;

	234 *query = MakeRange(query_separator + 1, query_end);

	235 } else {

	236 query->reset();

	237 }

	238

	239 // File path: treat an empty file path as no file path.

	240 if (file_end != path.begin)

	241 *filepath = MakeRange(path.begin, file_end);

	242 else

	243 filepath->reset();

	244 }

	245

	246 template<typename CHAR>

	247 bool DoExtractScheme(const CHAR* url,

	248 int url_len,

	249 Component* scheme) {

	250 // Skip leading whitespace and control characters.

	251 int begin = 0;

	252 while (begin < url_len && ShouldTrimFromURL(url[begin]))

	253 begin++;

	254 if (begin == url_len)

	255 return false; // Input is empty or all whitespace.

	256

	257 // Find the first colon character.

	258 for (int i = begin; i < url_len; i++) {

	259 if (url[i] == ':') {

	260 *scheme = MakeRange(begin, i);

	261 return true;

	262 }

	263 }

	264 return false; // No colon found: no scheme

	265 }

	266

	267 // Fills in all members of the Parsed structure except for the scheme.

	268 //

	269 // \|spec\| is the full spec being parsed, of length \|spec_len\|.

	270 // \|after_scheme\| is the character immediately following the scheme (after the

	271 // colon) where we'll begin parsing.

	272 //

	273 // Compatability data points. I list "host", "path" extracted:

	274 // Input IE6 Firefox Us

	275 // ----- -------------- -------------- --------------

	276 // http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

	277 // http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

	278 // http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/"

	279 // http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/"

	280 // http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

	281 //

	282 // (*) Interestingly, although IE fails to load these URLs, its history

	283 // canonicalizer handles them, meaning if you've been to the corresponding

	284 // "http://foo.com/" link, it will be colored.

	285 template <typename CHAR>

	286 void DoParseAfterScheme(const CHAR* spec,

	287 int spec_len,

	288 int after_scheme,

	289 Parsed* parsed) {

	290 int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);

	291 int after_slashes = after_scheme + num_slashes;

	292

	293 // First split into two main parts, the authority (username, password, host,

	294 // and port) and the full path (path, query, and reference).

	295 Component authority;

	296 Component full_path;

	297

	298 // Found "//<some data>", looks like an authority section. Treat everything

	299 // from there to the next slash (or end of spec) to be the authority. Note

	300 // that we ignore the number of slashes and treat it as the authority.

	301 int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);

	302 authority = Component(after_slashes, end_auth - after_slashes);

	303

	304 if (end_auth == spec_len) // No beginning of path found.

	305 full_path = Component();

	306 else // Everything starting from the slash to the end is the path.

	307 full_path = Component(end_auth, spec_len - end_auth);

	308

	309 // Now parse those two sub-parts.

	310 DoParseAuthority(spec, authority, &parsed->username, &parsed->password,

	311 &parsed->host, &parsed->port);

	312 ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);

	313 }

	314

	315 // The main parsing function for standard URLs. Standard URLs have a scheme,

	316 // host, path, etc.

	317 template<typename CHAR>

	318 void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {

	319 DCHECK(spec_len >= 0);

	320

	321 // Strip leading & trailing spaces and control characters.

	322 int begin = 0;

	323 TrimURL(spec, &begin, &spec_len);

	324

	325 int after_scheme;

	326 if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {

	327 after_scheme = parsed->scheme.end() + 1; // Skip past the colon.

	328 } else {

	329 // Say there's no scheme when there is no colon. We could also say that

	330 // everything is the scheme. Both would produce an invalid URL, but this way

	331 // seems less wrong in more cases.

	332 parsed->scheme.reset();

	333 after_scheme = begin;

	334 }

	335 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

	336 }

	337

	338 template<typename CHAR>

	339 void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {

	340 DCHECK(spec_len >= 0);

	341

	342 // Get the unused parts of the URL out of the way.

	343 parsed->username.reset();

	344 parsed->password.reset();

	345 parsed->host.reset();

	346 parsed->port.reset();

	347 parsed->path.reset(); // May use this; reset for convenience.

	348 parsed->ref.reset(); // May use this; reset for convenience.

	349 parsed->query.reset(); // May use this; reset for convenience.

	350 parsed->clear_inner_parsed(); // May use this; reset for convenience.

	351

	352 // Strip leading & trailing spaces and control characters.

	353 int begin = 0;

	354 TrimURL(spec, &begin, &spec_len);

	355

	356 // Handle empty specs or ones that contain only whitespace or control chars.

	357 if (begin == spec_len) {

	358 parsed->scheme.reset();

	359 return;

	360 }

	361

	362 int inner_start = -1;

	363

	364 // Extract the scheme. We also handle the case where there is no scheme.

	365 if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

	366 // Offset the results since we gave ExtractScheme a substring.

	367 parsed->scheme.begin += begin;

	368

	369 if (parsed->scheme.end() == spec_len - 1)

	370 return;

	371

	372 inner_start = parsed->scheme.end() + 1;

	373 } else {

	374 // No scheme found; that's not valid for filesystem URLs.

	375 parsed->scheme.reset();

	376 return;

	377 }

	378

	379 url_parse::Component inner_scheme;

	380 const CHAR* inner_spec = &spec[inner_start];

	381 int inner_spec_len = spec_len - inner_start;

	382

	383 if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {

	384 // Offset the results since we gave ExtractScheme a substring.

	385 inner_scheme.begin += inner_start;

	386

	387 if (inner_scheme.end() == spec_len - 1)

	388 return;

	389 } else {

	390 // No scheme found; that's not valid for filesystem URLs.

	391 // The best we can do is return "filesystem://".

	392 return;

	393 }

	394

	395 Parsed inner_parsed;

	396

	397 if (url_util::CompareSchemeComponent(

	398 spec, inner_scheme, url_util::kFileScheme)) {

	399 // File URLs are special.

	400 ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);

	401 } else if (url_util::CompareSchemeComponent(spec, inner_scheme,

	402 url_util::kFileSystemScheme)) {

	403 // Filesystem URLs don't nest.

	404 return;

	405 } else if (url_util::IsStandard(spec, inner_scheme)) {

	406 // All "normal" URLs.

	407 DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);

	408 } else {

	409 return;

	410 }

	411

	412 // All members of inner_parsed need to be offset by inner_start.

	413 // If we had any scheme that supported nesting more than one level deep,

	414 // we'd have to recurse into the inner_parsed's inner_parsed when

	415 // adjusting by inner_start.

	416 inner_parsed.scheme.begin += inner_start;

	417 inner_parsed.username.begin += inner_start;

	418 inner_parsed.password.begin += inner_start;

	419 inner_parsed.host.begin += inner_start;

	420 inner_parsed.port.begin += inner_start;

	421 inner_parsed.query.begin += inner_start;

	422 inner_parsed.ref.begin += inner_start;

	423 inner_parsed.path.begin += inner_start;

	424

	425 // Query and ref move from inner_parsed to parsed.

	426 parsed->query = inner_parsed.query;

	427 inner_parsed.query.reset();

	428 parsed->ref = inner_parsed.ref;

	429 inner_parsed.ref.reset();

	430

	431 parsed->set_inner_parsed(inner_parsed);

	432 if (!inner_parsed.scheme.is_valid() \|\| !inner_parsed.path.is_valid() \|\|

	433 inner_parsed.inner_parsed()) {

	434 return;

	435 }

	436

	437 // The path in inner_parsed should start with a slash, then have a filesystem

	438 // type followed by a slash. From the first slash up to but excluding the

	439 // second should be what it keeps; the rest goes to parsed. If the path ends

	440 // before the second slash, it's still pretty clear what the user meant, so

	441 // we'll let that through.

	442 if (!IsURLSlash(spec[inner_parsed.path.begin])) {

	443 return;

	444 }

	445 int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash

	446 while (inner_path_end < spec_len &&

	447 !IsURLSlash(spec[inner_path_end]))

	448 ++inner_path_end;

	449 parsed->path.begin = inner_path_end;

	450 int new_inner_path_length = inner_path_end - inner_parsed.path.begin;

	451 parsed->path.len = inner_parsed.path.len - new_inner_path_length;

	452 parsed->inner_parsed()->path.len = new_inner_path_length;

	453 }

	454

	455 // Initializes a path URL which is merely a scheme followed by a path. Examples

	456 // include "about:foo" and "javascript:alert('bar');"

	457 template<typename CHAR>

	458 void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {

	459 // Get the non-path and non-scheme parts of the URL out of the way, we never

	460 // use them.

	461 parsed->username.reset();

	462 parsed->password.reset();

	463 parsed->host.reset();

	464 parsed->port.reset();

	465 parsed->query.reset();

	466 parsed->ref.reset();

	467

	468 // Strip leading & trailing spaces and control characters.

	469 int begin = 0;

	470 TrimURL(spec, &begin, &spec_len);

	471

	472 // Handle empty specs or ones that contain only whitespace or control chars.

	473 if (begin == spec_len) {

	474 parsed->scheme.reset();

	475 parsed->path.reset();

	476 return;

	477 }

	478

	479 // Extract the scheme, with the path being everything following. We also

	480 // handle the case where there is no scheme.

	481 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

	482 // Offset the results since we gave ExtractScheme a substring.

	483 parsed->scheme.begin += begin;

	484

	485 // For compatability with the standard URL parser, we treat no path as

	486 // -1, rather than having a length of 0 (we normally wouldn't care so

	487 // much for these non-standard URLs).

	488 if (parsed->scheme.end() == spec_len - 1)

	489 parsed->path.reset();

	490 else

	491 parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);

	492 } else {

	493 // No scheme found, just path.

	494 parsed->scheme.reset();

	495 parsed->path = MakeRange(begin, spec_len);

	496 }

	497 }

	498

	499 template<typename CHAR>

	500 void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {

	501 DCHECK(spec_len >= 0);

	502

	503 // Get the non-path and non-scheme parts of the URL out of the way, we never

	504 // use them.

	505 parsed->username.reset();

	506 parsed->password.reset();

	507 parsed->host.reset();

	508 parsed->port.reset();

	509 parsed->ref.reset();

	510 parsed->query.reset(); // May use this; reset for convenience.

	511

	512 // Strip leading & trailing spaces and control characters.

	513 int begin = 0;

	514 TrimURL(spec, &begin, &spec_len);

	515

	516 // Handle empty specs or ones that contain only whitespace or control chars.

	517 if (begin == spec_len) {

	518 parsed->scheme.reset();

	519 parsed->path.reset();

	520 return;

	521 }

	522

	523 int path_begin = -1;

	524 int path_end = -1;

	525

	526 // Extract the scheme, with the path being everything following. We also

	527 // handle the case where there is no scheme.

	528 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

	529 // Offset the results since we gave ExtractScheme a substring.

	530 parsed->scheme.begin += begin;

	531

	532 if (parsed->scheme.end() != spec_len - 1) {

	533 path_begin = parsed->scheme.end() + 1;

	534 path_end = spec_len;

	535 }

	536 } else {

	537 // No scheme found, just path.

	538 parsed->scheme.reset();

	539 path_begin = begin;

	540 path_end = spec_len;

	541 }

	542

	543 // Split [path_begin, path_end) into a path + query.

	544 for (int i = path_begin; i < path_end; ++i) {

	545 if (spec[i] == '?') {

	546 parsed->query = MakeRange(i + 1, path_end);

	547 path_end = i;

	548 break;

	549 }

	550 }

	551

	552 // For compatability with the standard URL parser, treat no path as

	553 // -1, rather than having a length of 0

	554 if (path_begin == path_end) {

	555 parsed->path.reset();

	556 } else {

	557 parsed->path = MakeRange(path_begin, path_end);

	558 }

	559 }

	560

	561 // Converts a port number in a string to an integer. We'd like to just call

	562 // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,

	563 // we copy the digits to a small stack buffer (since we know the maximum number

	564 // of digits in a valid port number) that we can NULL terminate.

	565 template<typename CHAR>

	566 int DoParsePort(const CHAR* spec, const Component& component) {

	567 // Easy success case when there is no port.

	568 const int kMaxDigits = 5;

	569 if (!component.is_nonempty())

	570 return PORT_UNSPECIFIED;

	571

	572 // Skip over any leading 0s.

	573 Component digits_comp(component.end(), 0);

	574 for (int i = 0; i < component.len; i++) {

	575 if (spec[component.begin + i] != '0') {

	576 digits_comp = MakeRange(component.begin + i, component.end());

	577 break;

	578 }

	579 }

	580 if (digits_comp.len == 0)

	581 return 0; // All digits were 0.

	582

	583 // Verify we don't have too many digits (we'll be copying to our buffer so

	584 // we need to double-check).

	585 if (digits_comp.len > kMaxDigits)

	586 return PORT_INVALID;

	587

	588 // Copy valid digits to the buffer.

	589 char digits[kMaxDigits + 1]; // +1 for null terminator

	590 for (int i = 0; i < digits_comp.len; i++) {

	591 CHAR ch = spec[digits_comp.begin + i];

	592 if (!IsPortDigit(ch)) {

	593 // Invalid port digit, fail.

	594 return PORT_INVALID;

	595 }

	596 digits[i] = static_cast<char>(ch);

	597 }

	598

	599 // Null-terminate the string and convert to integer. Since we guarantee

	600 // only digits, atoi's lack of error handling is OK.

	601 digits[digits_comp.len] = 0;

	602 int port = atoi(digits);

	603 if (port > 65535)

	604 return PORT_INVALID; // Out of range.

	605 return port;

	606 }

	607

	608 template<typename CHAR>

	609 void DoExtractFileName(const CHAR* spec,

	610 const Component& path,

	611 Component* file_name) {

	612 // Handle empty paths: they have no file names.

	613 if (!path.is_nonempty()) {

	614 file_name->reset();

	615 return;

	616 }

	617

	618 // Search backwards for a parameter, which is a normally unused field in a

	619 // URL delimited by a semicolon. We parse the parameter as part of the

	620 // path, but here, we don't want to count it. The last semicolon is the

	621 // parameter. The path should start with a slash, so we don't need to check

	622 // the first one.

	623 int file_end = path.end();

	624 for (int i = path.end() - 1; i > path.begin; i--) {

	625 if (spec[i] == ';') {

	626 file_end = i;

	627 break;

	628 }

	629 }

	630

	631 // Now search backwards from the filename end to the previous slash

	632 // to find the beginning of the filename.

	633 for (int i = file_end - 1; i >= path.begin; i--) {

	634 if (IsURLSlash(spec[i])) {

	635 // File name is everything following this character to the end

	636 *file_name = MakeRange(i + 1, file_end);

	637 return;

	638 }

	639 }

	640

	641 // No slash found, this means the input was degenerate (generally paths

	642 // will start with a slash). Let's call everything the file name.

	643 *file_name = MakeRange(path.begin, file_end);

	644 return;

	645 }

	646

	647 template<typename CHAR>

	648 bool DoExtractQueryKeyValue(const CHAR* spec,

	649 Component* query,

	650 Component* key,

	651 Component* value) {

	652 if (!query->is_nonempty())

	653 return false;

	654

	655 int start = query->begin;

	656 int cur = start;

	657 int end = query->end();

	658

	659 // We assume the beginning of the input is the beginning of the "key" and we

	660 // skip to the end of it.

	661 key->begin = cur;

	662 while (cur < end && spec[cur] != '&' && spec[cur] != '=')

	663 cur++;

	664 key->len = cur - key->begin;

	665

	666 // Skip the separator after the key (if any).

	667 if (cur < end && spec[cur] == '=')

	668 cur++;

	669

	670 // Find the value part.

	671 value->begin = cur;

	672 while (cur < end && spec[cur] != '&')

	673 cur++;

	674 value->len = cur - value->begin;

	675

	676 // Finally skip the next separator if any

	677 if (cur < end && spec[cur] == '&')

	678 cur++;

	679

	680 // Save the new query

	681 *query = url_parse::MakeRange(cur, end);

	682 return true;

	683 }

	684

	685 } // namespace

	686

	687 Parsed::Parsed() : inner_parsed_(NULL) {

	688 }

	689

	690 Parsed::Parsed(const Parsed& other) :

	691 scheme(other.scheme),

	692 username(other.username),

	693 password(other.password),

	694 host(other.host),

	695 port(other.port),

	696 path(other.path),

	697 query(other.query),

	698 ref(other.ref),

	699 inner_parsed_(NULL) {

	700 if (other.inner_parsed_)

	701 set_inner_parsed(*other.inner_parsed_);

	702 }

	703

	704 Parsed& Parsed::operator=(const Parsed& other) {

	705 if (this != &other) {

	706 scheme = other.scheme;

	707 username = other.username;

	708 password = other.password;

	709 host = other.host;

	710 port = other.port;

	711 path = other.path;

	712 query = other.query;

	713 ref = other.ref;

	714 if (other.inner_parsed_)

	715 set_inner_parsed(*other.inner_parsed_);

	716 else

	717 clear_inner_parsed();

	718 }

	719 return *this;

	720 }

	721

	722 Parsed::~Parsed() {

	723 delete inner_parsed_;

	724 }

	725

	726 int Parsed::Length() const {

	727 if (ref.is_valid())

	728 return ref.end();

	729 return CountCharactersBefore(REF, false);

	730 }

	731

	732 int Parsed::CountCharactersBefore(ComponentType type,

	733 bool include_delimiter) const {

	734 if (type == SCHEME)

	735 return scheme.begin;

	736

	737 // There will be some characters after the scheme like "://" and we don't

	738 // know how many. Search forwards for the next thing until we find one.

	739 int cur = 0;

	740 if (scheme.is_valid())

	741 cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme.

	742

	743 if (username.is_valid()) {

	744 if (type <= USERNAME)

	745 return username.begin;

	746 cur = username.end() + 1; // Advance over the '@' or ':' at the end.

	747 }

	748

	749 if (password.is_valid()) {

	750 if (type <= PASSWORD)

	751 return password.begin;

	752 cur = password.end() + 1; // Advance over the '@' at the end.

	753 }

	754

	755 if (host.is_valid()) {

	756 if (type <= HOST)

	757 return host.begin;

	758 cur = host.end();

	759 }

	760

	761 if (port.is_valid()) {

	762 if (type < PORT \|\| (type == PORT && include_delimiter))

	763 return port.begin - 1; // Back over delimiter.

	764 if (type == PORT)

	765 return port.begin; // Don't want delimiter counted.

	766 cur = port.end();

	767 }

	768

	769 if (path.is_valid()) {

	770 if (type <= PATH)

	771 return path.begin;

	772 cur = path.end();

	773 }

	774

	775 if (query.is_valid()) {

	776 if (type < QUERY \|\| (type == QUERY && include_delimiter))

	777 return query.begin - 1; // Back over delimiter.

	778 if (type == QUERY)

	779 return query.begin; // Don't want delimiter counted.

	780 cur = query.end();

	781 }

	782

	783 if (ref.is_valid()) {

	784 if (type == REF && !include_delimiter)

	785 return ref.begin; // Back over delimiter.

	786

	787 // When there is a ref and we get here, the component we wanted was before

	788 // this and not found, so we always know the beginning of the ref is right.

	789 return ref.begin - 1; // Don't want delimiter counted.

	790 }

	791

	792 return cur;

	793 }

	794

	795 bool ExtractScheme(const char* url, int url_len, Component* scheme) {

	796 return DoExtractScheme(url, url_len, scheme);

	797 }

	798

	799 bool ExtractScheme(const char16* url, int url_len, Component* scheme) {

	800 return DoExtractScheme(url, url_len, scheme);

	801 }

	802

	803 // This handles everything that may be an authority terminator, including

	804 // backslash. For special backslash handling see DoParseAfterScheme.

	805 bool IsAuthorityTerminator(char16 ch) {

	806 return IsURLSlash(ch) \|\| ch == '?' \|\| ch == '#';

	807 }

	808

	809 void ExtractFileName(const char* url,

	810 const Component& path,

	811 Component* file_name) {

	812 DoExtractFileName(url, path, file_name);

	813 }

	814

	815 void ExtractFileName(const char16* url,

	816 const Component& path,

	817 Component* file_name) {

	818 DoExtractFileName(url, path, file_name);

	819 }

	820

	821 bool ExtractQueryKeyValue(const char* url,

	822 Component* query,

	823 Component* key,

	824 Component* value) {

	825 return DoExtractQueryKeyValue(url, query, key, value);

	826 }

	827

	828 bool ExtractQueryKeyValue(const char16* url,

	829 Component* query,

	830 Component* key,

	831 Component* value) {

	832 return DoExtractQueryKeyValue(url, query, key, value);

	833 }

	834

	835 void ParseAuthority(const char* spec,

	836 const Component& auth,

	837 Component* username,

	838 Component* password,

	839 Component* hostname,

	840 Component* port_num) {

	841 DoParseAuthority(spec, auth, username, password, hostname, port_num);

	842 }

	843

	844 void ParseAuthority(const char16* spec,

	845 const Component& auth,

	846 Component* username,

	847 Component* password,

	848 Component* hostname,

	849 Component* port_num) {

	850 DoParseAuthority(spec, auth, username, password, hostname, port_num);

	851 }

	852

	853 int ParsePort(const char* url, const Component& port) {

	854 return DoParsePort(url, port);

	855 }

	856

	857 int ParsePort(const char16* url, const Component& port) {

	858 return DoParsePort(url, port);

	859 }

	860

	861 void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {

	862 DoParseStandardURL(url, url_len, parsed);

	863 }

	864

	865 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed) {

	866 DoParseStandardURL(url, url_len, parsed);

	867 }

	868

	869 void ParsePathURL(const char* url, int url_len, Parsed* parsed) {

	870 DoParsePathURL(url, url_len, parsed);

	871 }

	872

	873 void ParsePathURL(const char16* url, int url_len, Parsed* parsed) {

	874 DoParsePathURL(url, url_len, parsed);

	875 }

	876

	877 void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {

	878 DoParseFileSystemURL(url, url_len, parsed);

	879 }

	880

	881 void ParseFileSystemURL(const char16* url, int url_len, Parsed* parsed) {

	882 DoParseFileSystemURL(url, url_len, parsed);

	883 }

	884

	885 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {

	886 DoParseMailtoURL(url, url_len, parsed);

	887 }

	888

	889 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) {

	890 DoParseMailtoURL(url, url_len, parsed);

	891 }

	892

	893 void ParsePathInternal(const char* spec,

	894 const Component& path,

	895 Component* filepath,

	896 Component* query,

	897 Component* ref) {

	898 ParsePath(spec, path, filepath, query, ref);

	899 }

	900

	901 void ParsePathInternal(const char16* spec,

	902 const Component& path,

	903 Component* filepath,

	904 Component* query,

	905 Component* ref) {

	906 ParsePath(spec, path, filepath, query, ref);

	907 }

	908

	909 void ParseAfterScheme(const char* spec,

	910 int spec_len,

	911 int after_scheme,

	912 Parsed* parsed) {

	913 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

	914 }

	915

	916 void ParseAfterScheme(const char16* spec,

	917 int spec_len,

	918 int after_scheme,

	919 Parsed* parsed) {

	920 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

	921 }

	922

	923 } // namespace url_parse

OLD	NEW

« no previous file with comments | « url/url_parse.h ('k') | url/url_parse_file.cc » ('j') | no next file with comments »