url/url_util.cc - Issue 13821004: Move googleurl into the Chrome repo.

Side by Side Diff: url/url_util.cc

Issue 13821004: Move googleurl into the Chrome repo. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright 2007, Google Inc.

	2 // All rights reserved.

	3 //

	4 // Redistribution and use in source and binary forms, with or without

	5 // modification, are permitted provided that the following conditions are

	6 // met:

	7 //

	8 // * Redistributions of source code must retain the above copyright

	9 // notice, this list of conditions and the following disclaimer.

	10 // * Redistributions in binary form must reproduce the above

	11 // copyright notice, this list of conditions and the following disclaimer

	12 // in the documentation and/or other materials provided with the

	13 // distribution.

	14 // * Neither the name of Google Inc. nor the names of its

	15 // contributors may be used to endorse or promote products derived from

	16 // this software without specific prior written permission.

	17 //

	18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29

	30 #include <string.h>

	31 #include <vector>

	32

	33 #include "googleurl/src/url_util.h"

	34

	35 #include "base/logging.h"

	36 #include "googleurl/src/url_canon_internal.h"

	37 #include "googleurl/src/url_file.h"

	38 #include "googleurl/src/url_util_internal.h"

	39

	40 namespace url_util {

	41

	42 const char kFileScheme[] = "file";

	43 const char kFileSystemScheme[] = "filesystem";

	44 const char kMailtoScheme[] = "mailto";

	45

	46 namespace {

	47

	48 // ASCII-specific tolower. The standard library's tolower is locale sensitive,

	49 // so we don't want to use it here.

	50 template <class Char> inline Char ToLowerASCII(Char c) {

	51 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;

	52 }

	53

	54 // Backend for LowerCaseEqualsASCII.

	55 template<typename Iter>

	56 inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {

	57 for (Iter it = a_begin; it != a_end; ++it, ++b) {

	58 if (!b \|\| ToLowerASCII(it) != *b)

	59 return false;

	60 }

	61 return *b == 0;

	62 }

	63

	64 const int kNumStandardURLSchemes = 8;

	65 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {

	66 "http",

	67 "https",

	68 kFileScheme, // Yes, file urls can have a hostname!

	69 "ftp",

	70 "gopher",

	71 "ws", // WebSocket.

	72 "wss", // WebSocket secure.

	73 kFileSystemScheme,

	74 };

	75

	76 // List of the currently installed standard schemes. This list is lazily

	77 // initialized by InitStandardSchemes and is leaked on shutdown to prevent

	78 // any destructors from being called that will slow us down or cause problems.

	79 std::vector<const char> standard_schemes = NULL;

	80

	81 // See the LockStandardSchemes declaration in the header.

	82 bool standard_schemes_locked = false;

	83

	84 // Ensures that the standard_schemes list is initialized, does nothing if it

	85 // already has values.

	86 void InitStandardSchemes() {

	87 if (standard_schemes)

	88 return;

	89 standard_schemes = new std::vector<const char*>;

	90 for (int i = 0; i < kNumStandardURLSchemes; i++)

	91 standard_schemes->push_back(kStandardURLSchemes[i]);

	92 }

	93

	94 // Given a string and a range inside the string, compares it to the given

	95 // lower-case \|compare_to\| buffer.

	96 template<typename CHAR>

	97 inline bool DoCompareSchemeComponent(const CHAR* spec,

	98 const url_parse::Component& component,

	99 const char* compare_to) {

	100 if (!component.is_nonempty())

	101 return compare_to[0] == 0; // When component is empty, match empty scheme.

	102 return LowerCaseEqualsASCII(&spec[component.begin],

	103 &spec[component.end()],

	104 compare_to);

	105 }

	106

	107 // Returns true if the given scheme identified by \|scheme\| within \|spec\| is one

	108 // of the registered "standard" schemes.

	109 template<typename CHAR>

	110 bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {

	111 if (!scheme.is_nonempty())

	112 return false; // Empty or invalid schemes are non-standard.

	113

	114 InitStandardSchemes();

	115 for (size_t i = 0; i < standard_schemes->size(); i++) {

	116 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],

	117 standard_schemes->at(i)))

	118 return true;

	119 }

	120 return false;

	121 }

	122

	123 template<typename CHAR>

	124 bool DoFindAndCompareScheme(const CHAR* str,

	125 int str_len,

	126 const char* compare,

	127 url_parse::Component* found_scheme) {

	128 // Before extracting scheme, canonicalize the URL to remove any whitespace.

	129 // This matches the canonicalization done in DoCanonicalize function.

	130 url_canon::RawCanonOutputT<CHAR> whitespace_buffer;

	131 int spec_len;

	132 const CHAR* spec = RemoveURLWhitespace(str, str_len,

	133 &whitespace_buffer, &spec_len);

	134

	135 url_parse::Component our_scheme;

	136 if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) {

	137 // No scheme.

	138 if (found_scheme)

	139 *found_scheme = url_parse::Component();

	140 return false;

	141 }

	142 if (found_scheme)

	143 *found_scheme = our_scheme;

	144 return DoCompareSchemeComponent(spec, our_scheme, compare);

	145 }

	146

	147 template<typename CHAR>

	148 bool DoCanonicalize(const CHAR* in_spec, int in_spec_len,

	149 url_canon::CharsetConverter* charset_converter,

	150 url_canon::CanonOutput* output,

	151 url_parse::Parsed* output_parsed) {

	152 // Remove any whitespace from the middle of the relative URL, possibly

	153 // copying to the new buffer.

	154 url_canon::RawCanonOutputT<CHAR> whitespace_buffer;

	155 int spec_len;

	156 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,

	157 &whitespace_buffer, &spec_len);

	158

	159 url_parse::Parsed parsed_input;

	160 #ifdef WIN32

	161 // For Windows, we allow things that look like absolute Windows paths to be

	162 // fixed up magically to file URLs. This is done for IE compatability. For

	163 // example, this will change "c:/foo" into a file URL rather than treating

	164 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").

	165 // There is similar logic in url_canon_relative.cc for

	166 //

	167 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which

	168 // has no meaning as an absolute path name. This is because browsers on Mac

	169 // & Unix don't generally do this, so there is no compatibility reason for

	170 // doing so.

	171 if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) \|\|

	172 url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {

	173 url_parse::ParseFileURL(spec, spec_len, &parsed_input);

	174 return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,

	175 charset_converter,

	176 output, output_parsed);

	177 }

	178 #endif

	179

	180 url_parse::Component scheme;

	181 if (!url_parse::ExtractScheme(spec, spec_len, &scheme))

	182 return false;

	183

	184 // This is the parsed version of the input URL, we have to canonicalize it

	185 // before storing it in our object.

	186 bool success;

	187 if (DoCompareSchemeComponent(spec, scheme, kFileScheme)) {

	188 // File URLs are special.

	189 url_parse::ParseFileURL(spec, spec_len, &parsed_input);

	190 success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,

	191 charset_converter, output,

	192 output_parsed);

	193 } else if (DoCompareSchemeComponent(spec, scheme, kFileSystemScheme)) {

	194 // Filesystem URLs are special.

	195 url_parse::ParseFileSystemURL(spec, spec_len, &parsed_input);

	196 success = url_canon::CanonicalizeFileSystemURL(spec, spec_len,

	197 parsed_input,

	198 charset_converter,

	199 output, output_parsed);

	200

	201 } else if (DoIsStandard(spec, scheme)) {

	202 // All "normal" URLs.

	203 url_parse::ParseStandardURL(spec, spec_len, &parsed_input);

	204 success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,

	205 charset_converter,

	206 output, output_parsed);

	207

	208 } else if (DoCompareSchemeComponent(spec, scheme, kMailtoScheme)) {

	209 // Mailto are treated like a standard url with only a scheme, path, query

	210 url_parse::ParseMailtoURL(spec, spec_len, &parsed_input);

	211 success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input,

	212 output, output_parsed);

	213

	214 } else {

	215 // "Weird" URLs like data: and javascript:

	216 url_parse::ParsePathURL(spec, spec_len, &parsed_input);

	217 success = url_canon::CanonicalizePathURL(spec, spec_len, parsed_input,

	218 output, output_parsed);

	219 }

	220 return success;

	221 }

	222

	223 template<typename CHAR>

	224 bool DoResolveRelative(const char* base_spec,

	225 int base_spec_len,

	226 const url_parse::Parsed& base_parsed,

	227 const CHAR* in_relative,

	228 int in_relative_length,

	229 url_canon::CharsetConverter* charset_converter,

	230 url_canon::CanonOutput* output,

	231 url_parse::Parsed* output_parsed) {

	232 // Remove any whitespace from the middle of the relative URL, possibly

	233 // copying to the new buffer.

	234 url_canon::RawCanonOutputT<CHAR> whitespace_buffer;

	235 int relative_length;

	236 const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,

	237 &whitespace_buffer,

	238 &relative_length);

	239 bool base_is_authority_based = false;

	240 bool base_is_hierarchical = false;

	241 if (base_spec &&

	242 base_parsed.scheme.is_nonempty()) {

	243 int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon.

	244 int num_slashes = url_parse::CountConsecutiveSlashes(

	245 base_spec, after_scheme, base_spec_len);

	246 base_is_authority_based = num_slashes > 1;

	247 base_is_hierarchical = num_slashes > 0;

	248 }

	249

	250 bool standard_base_scheme =

	251 base_parsed.scheme.is_nonempty() &&

	252 DoIsStandard(base_spec, base_parsed.scheme);

	253

	254 bool is_relative;

	255 url_parse::Component relative_component;

	256 if (!url_canon::IsRelativeURL(base_spec, base_parsed,

	257 relative, relative_length,

	258 (base_is_hierarchical \|\| standard_base_scheme),

	259 &is_relative,

	260 &relative_component)) {

	261 // Error resolving.

	262 return false;

	263 }

	264

	265 // Pretend for a moment that \|base_spec\| is a standard URL. Normally

	266 // non-standard URLs are treated as PathURLs, but if the base has an

	267 // authority we would like to preserve it.

	268 if (is_relative && base_is_authority_based && !standard_base_scheme) {

	269 url_parse::Parsed base_parsed_authority;

	270 ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority);

	271 if (base_parsed_authority.host.is_nonempty()) {

	272 bool did_resolve_succeed =

	273 url_canon::ResolveRelativeURL(base_spec, base_parsed_authority,

	274 false, relative,

	275 relative_component, charset_converter,

	276 output, output_parsed);

	277 // The output_parsed is incorrect at this point (because it was built

	278 // based on base_parsed_authority instead of base_parsed) and needs to be

	279 // re-created.

	280 ParsePathURL(output->data(), output->length(), output_parsed);

	281 return did_resolve_succeed;

	282 }

	283 } else if (is_relative) {

	284 // Relative, resolve and canonicalize.

	285 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&

	286 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);

	287 return url_canon::ResolveRelativeURL(base_spec, base_parsed,

	288 file_base_scheme, relative,

	289 relative_component, charset_converter,

	290 output, output_parsed);

	291 }

	292

	293 // Not relative, canonicalize the input.

	294 return DoCanonicalize(relative, relative_length, charset_converter,

	295 output, output_parsed);

	296 }

	297

	298 template<typename CHAR>

	299 bool DoReplaceComponents(const char* spec,

	300 int spec_len,

	301 const url_parse::Parsed& parsed,

	302 const url_canon::Replacements<CHAR>& replacements,

	303 url_canon::CharsetConverter* charset_converter,

	304 url_canon::CanonOutput* output,

	305 url_parse::Parsed* out_parsed) {

	306 // If the scheme is overridden, just do a simple string substitution and

	307 // reparse the whole thing. There are lots of edge cases that we really don't

	308 // want to deal with. Like what happens if I replace "http://e:8080/foo"

	309 // with a file. Does it become "file:///E:/8080/foo" where the port number

	310 // becomes part of the path? Parsing that string as a file URL says "yes"

	311 // but almost no sane rule for dealing with the components individually would

	312 // come up with that.

	313 //

	314 // Why allow these crazy cases at all? Programatically, there is almost no

	315 // case for replacing the scheme. The most common case for hitting this is

	316 // in JS when building up a URL using the location object. In this case, the

	317 // JS code expects the string substitution behavior:

	318 // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3

	319 if (replacements.IsSchemeOverridden()) {

	320 // Canonicalize the new scheme so it is 8-bit and can be concatenated with

	321 // the existing spec.

	322 url_canon::RawCanonOutput<128> scheme_replaced;

	323 url_parse::Component scheme_replaced_parsed;

	324 url_canon::CanonicalizeScheme(

	325 replacements.sources().scheme,

	326 replacements.components().scheme,

	327 &scheme_replaced, &scheme_replaced_parsed);

	328

	329 // We can assume that the input is canonicalized, which means it always has

	330 // a colon after the scheme (or where the scheme would be).

	331 int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1

	332 : 1;

	333 if (spec_len - spec_after_colon > 0) {

	334 scheme_replaced.Append(&spec[spec_after_colon],

	335 spec_len - spec_after_colon);

	336 }

	337

	338 // We now need to completely re-parse the resulting string since its meaning

	339 // may have changed with the different scheme.

	340 url_canon::RawCanonOutput<128> recanonicalized;

	341 url_parse::Parsed recanonicalized_parsed;

	342 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),

	343 charset_converter,

	344 &recanonicalized, &recanonicalized_parsed);

	345

	346 // Recurse using the version with the scheme already replaced. This will now

	347 // use the replacement rules for the new scheme.

	348 //

	349 // Warning: this code assumes that ReplaceComponents will re-check all

	350 // components for validity. This is because we can't fail if DoCanonicalize

	351 // failed above since theoretically the thing making it fail could be

	352 // getting replaced here. If ReplaceComponents didn't re-check everything,

	353 // we wouldn't know if something not getting replaced is a problem.

	354 // If the scheme-specific replacers are made more intelligent so they don't

	355 // re-check everything, we should instead recanonicalize the whole thing

	356 // after this call to check validity (this assumes replacing the scheme is

	357 // much much less common than other types of replacements, like clearing the

	358 // ref).

	359 url_canon::Replacements<CHAR> replacements_no_scheme = replacements;

	360 replacements_no_scheme.SetScheme(NULL, url_parse::Component());

	361 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),

	362 recanonicalized_parsed, replacements_no_scheme,

	363 charset_converter, output, out_parsed);

	364 }

	365

	366 // If we get here, then we know the scheme doesn't need to be replaced, so can

	367 // just key off the scheme in the spec to know how to do the replacements.

	368 if (DoCompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {

	369 return url_canon::ReplaceFileURL(spec, parsed, replacements,

	370 charset_converter, output, out_parsed);

	371 }

	372 if (DoCompareSchemeComponent(spec, parsed.scheme, kFileSystemScheme)) {

	373 return url_canon::ReplaceFileSystemURL(spec, parsed, replacements,

	374 charset_converter, output,

	375 out_parsed);

	376 }

	377 if (DoIsStandard(spec, parsed.scheme)) {

	378 return url_canon::ReplaceStandardURL(spec, parsed, replacements,

	379 charset_converter, output, out_parsed);

	380 }

	381 if (DoCompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {

	382 return url_canon::ReplaceMailtoURL(spec, parsed, replacements,

	383 output, out_parsed);

	384 }

	385

	386 // Default is a path URL.

	387 return url_canon::ReplacePathURL(spec, parsed, replacements,

	388 output, out_parsed);

	389 }

	390

	391 } // namespace

	392

	393 void Initialize() {

	394 InitStandardSchemes();

	395 }

	396

	397 void Shutdown() {

	398 if (standard_schemes) {

	399 delete standard_schemes;

	400 standard_schemes = NULL;

	401 }

	402 }

	403

	404 void AddStandardScheme(const char* new_scheme) {

	405 // If this assert triggers, it means you've called AddStandardScheme after

	406 // LockStandardSchemes have been called (see the header file for

	407 // LockStandardSchemes for more).

	408 //

	409 // This normally means you're trying to set up a new standard scheme too late

	410 // in your application's init process. Locate where your app does this

	411 // initialization and calls LockStandardScheme, and add your new standard

	412 // scheme there.

	413 DCHECK(!standard_schemes_locked) <<

	414 "Trying to add a standard scheme after the list has been locked.";

	415

	416 size_t scheme_len = strlen(new_scheme);

	417 if (scheme_len == 0)

	418 return;

	419

	420 // Dulicate the scheme into a new buffer and add it to the list of standard

	421 // schemes. This pointer will be leaked on shutdown.

	422 char* dup_scheme = new char[scheme_len + 1];

	423 memcpy(dup_scheme, new_scheme, scheme_len + 1);

	424

	425 InitStandardSchemes();

	426 standard_schemes->push_back(dup_scheme);

	427 }

	428

	429 void LockStandardSchemes() {

	430 standard_schemes_locked = true;

	431 }

	432

	433 bool IsStandard(const char* spec, const url_parse::Component& scheme) {

	434 return DoIsStandard(spec, scheme);

	435 }

	436

	437 bool IsStandard(const char16* spec, const url_parse::Component& scheme) {

	438 return DoIsStandard(spec, scheme);

	439 }

	440

	441 bool FindAndCompareScheme(const char* str,

	442 int str_len,

	443 const char* compare,

	444 url_parse::Component* found_scheme) {

	445 return DoFindAndCompareScheme(str, str_len, compare, found_scheme);

	446 }

	447

	448 bool FindAndCompareScheme(const char16* str,

	449 int str_len,

	450 const char* compare,

	451 url_parse::Component* found_scheme) {

	452 return DoFindAndCompareScheme(str, str_len, compare, found_scheme);

	453 }

	454

	455 bool Canonicalize(const char* spec,

	456 int spec_len,

	457 url_canon::CharsetConverter* charset_converter,

	458 url_canon::CanonOutput* output,

	459 url_parse::Parsed* output_parsed) {

	460 return DoCanonicalize(spec, spec_len, charset_converter,

	461 output, output_parsed);

	462 }

	463

	464 bool Canonicalize(const char16* spec,

	465 int spec_len,

	466 url_canon::CharsetConverter* charset_converter,

	467 url_canon::CanonOutput* output,

	468 url_parse::Parsed* output_parsed) {

	469 return DoCanonicalize(spec, spec_len, charset_converter,

	470 output, output_parsed);

	471 }

	472

	473 bool ResolveRelative(const char* base_spec,

	474 int base_spec_len,

	475 const url_parse::Parsed& base_parsed,

	476 const char* relative,

	477 int relative_length,

	478 url_canon::CharsetConverter* charset_converter,

	479 url_canon::CanonOutput* output,

	480 url_parse::Parsed* output_parsed) {

	481 return DoResolveRelative(base_spec, base_spec_len, base_parsed,

	482 relative, relative_length,

	483 charset_converter, output, output_parsed);

	484 }

	485

	486 bool ResolveRelative(const char* base_spec,

	487 int base_spec_len,

	488 const url_parse::Parsed& base_parsed,

	489 const char16* relative,

	490 int relative_length,

	491 url_canon::CharsetConverter* charset_converter,

	492 url_canon::CanonOutput* output,

	493 url_parse::Parsed* output_parsed) {

	494 return DoResolveRelative(base_spec, base_spec_len, base_parsed,

	495 relative, relative_length,

	496 charset_converter, output, output_parsed);

	497 }

	498

	499 bool ReplaceComponents(const char* spec,

	500 int spec_len,

	501 const url_parse::Parsed& parsed,

	502 const url_canon::Replacements<char>& replacements,

	503 url_canon::CharsetConverter* charset_converter,

	504 url_canon::CanonOutput* output,

	505 url_parse::Parsed* out_parsed) {

	506 return DoReplaceComponents(spec, spec_len, parsed, replacements,

	507 charset_converter, output, out_parsed);

	508 }

	509

	510 bool ReplaceComponents(const char* spec,

	511 int spec_len,

	512 const url_parse::Parsed& parsed,

	513 const url_canon::Replacements<char16>& replacements,

	514 url_canon::CharsetConverter* charset_converter,

	515 url_canon::CanonOutput* output,

	516 url_parse::Parsed* out_parsed) {

	517 return DoReplaceComponents(spec, spec_len, parsed, replacements,

	518 charset_converter, output, out_parsed);

	519 }

	520

	521 // Front-ends for LowerCaseEqualsASCII.

	522 bool LowerCaseEqualsASCII(const char* a_begin,

	523 const char* a_end,

	524 const char* b) {

	525 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

	526 }

	527

	528 bool LowerCaseEqualsASCII(const char* a_begin,

	529 const char* a_end,

	530 const char* b_begin,

	531 const char* b_end) {

	532 while (a_begin != a_end && b_begin != b_end &&

	533 ToLowerASCII(a_begin) == b_begin) {

	534 a_begin++;

	535 b_begin++;

	536 }

	537 return a_begin == a_end && b_begin == b_end;

	538 }

	539

	540 bool LowerCaseEqualsASCII(const char16* a_begin,

	541 const char16* a_end,

	542 const char* b) {

	543 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

	544 }

	545

	546 void DecodeURLEscapeSequences(const char* input, int length,

	547 url_canon::CanonOutputW* output) {

	548 url_canon::RawCanonOutputT<char> unescaped_chars;

	549 for (int i = 0; i < length; i++) {

	550 if (input[i] == '%') {

	551 unsigned char ch;

	552 if (url_canon::DecodeEscaped(input, &i, length, &ch)) {

	553 unescaped_chars.push_back(ch);

	554 } else {

	555 // Invalid escape sequence, copy the percent literal.

	556 unescaped_chars.push_back('%');

	557 }

	558 } else {

	559 // Regular non-escaped 8-bit character.

	560 unescaped_chars.push_back(input[i]);

	561 }

	562 }

	563

	564 // Convert that 8-bit to UTF-16. It's not clear IE does this at all to

	565 // JavaScript URLs, but Firefox and Safari do.

	566 for (int i = 0; i < unescaped_chars.length(); i++) {

	567 unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));

	568 if (uch < 0x80) {

	569 // Non-UTF-8, just append directly

	570 output->push_back(uch);

	571 } else {

	572 // next_ch will point to the last character of the decoded

	573 // character.

	574 int next_character = i;

	575 unsigned code_point;

	576 if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character,

	577 unescaped_chars.length(), &code_point)) {

	578 // Valid UTF-8 character, convert to UTF-16.

	579 url_canon::AppendUTF16Value(code_point, output);

	580 i = next_character;

	581 } else {

	582 // If there are any sequences that are not valid UTF-8, we keep

	583 // invalid code points and promote to UTF-16. We copy all characters

	584 // from the current position to the end of the identified sequence.

	585 while (i < next_character) {

	586 output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));

	587 i++;

	588 }

	589 output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));

	590 }

	591 }

	592 }

	593 }

	594

	595 void EncodeURIComponent(const char* input, int length,

	596 url_canon::CanonOutput* output) {

	597 for (int i = 0; i < length; ++i) {

	598 unsigned char c = static_cast<unsigned char>(input[i]);

	599 if (url_canon::IsComponentChar(c))

	600 output->push_back(c);

	601 else

	602 AppendEscapedChar(c, output);

	603 }

	604 }

	605

	606 bool CompareSchemeComponent(const char* spec,

	607 const url_parse::Component& component,

	608 const char* compare_to) {

	609 return DoCompareSchemeComponent(spec, component, compare_to);

	610 }

	611

	612 bool CompareSchemeComponent(const char16* spec,

	613 const url_parse::Component& component,

	614 const char* compare_to) {

	615 return DoCompareSchemeComponent(spec, component, compare_to);

	616 }

	617

	618 } // namespace url_util

OLD	NEW

« no previous file with comments | « url/url_util.h ('k') | url/url_util_internal.h » ('j') | no next file with comments »