| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2004, 2007, 2008, 2011, 2012 Apple Inc. All rights reserved. | |
| 3 * Copyright (C) 2012 Research In Motion Limited. All rights reserved. | |
| 4 * Copyright (C) 2008, 2009, 2011 Google Inc. All rights reserved. | |
| 5 * | |
| 6 * Redistribution and use in source and binary forms, with or without | |
| 7 * modification, are permitted provided that the following conditions | |
| 8 * are met: | |
| 9 * 1. Redistributions of source code must retain the above copyright | |
| 10 * notice, this list of conditions and the following disclaimer. | |
| 11 * 2. Redistributions in binary form must reproduce the above copyright | |
| 12 * notice, this list of conditions and the following disclaimer in the | |
| 13 * documentation and/or other materials provided with the distribution. | |
| 14 * | |
| 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
| 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | |
| 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 26 */ | |
| 27 | |
| 28 #include "config.h" | |
| 29 #include "weborigin/KURL.h" | |
| 30 | |
| 31 #include "weborigin/KnownPorts.h" | |
| 32 #include "wtf/HashMap.h" | |
| 33 #include "wtf/StdLibExtras.h" | |
| 34 #include "wtf/text/CString.h" | |
| 35 #include "wtf/text/StringHash.h" | |
| 36 #include "wtf/text/StringUTF8Adaptor.h" | |
| 37 #include "wtf/text/TextEncoding.h" | |
| 38 #include <algorithm> | |
| 39 #include <url/url_util.h> | |
| 40 #ifndef NDEBUG | |
| 41 #include <stdio.h> | |
| 42 #endif | |
| 43 | |
| 44 namespace WebCore { | |
| 45 | |
| 46 static const int maximumValidPortNumber = 0xFFFE; | |
| 47 static const int invalidPortNumber = 0xFFFF; | |
| 48 | |
| 49 static void assertProtocolIsGood(const char* protocol) | |
| 50 { | |
| 51 #ifndef NDEBUG | |
| 52 const char* p = protocol; | |
| 53 while (*p) { | |
| 54 ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z')); | |
| 55 ++p; | |
| 56 } | |
| 57 #endif | |
| 58 } | |
| 59 | |
| 60 // Note: You must ensure that |spec| is a valid canonicalized URL before calling
this function. | |
| 61 static const char* asURLChar8Subtle(const String& spec) | |
| 62 { | |
| 63 ASSERT(spec.is8Bit()); | |
| 64 // characters8 really return characters in Latin-1, but because we canonical
ize | |
| 65 // URL strings, we know that everything before the fragment identifier will | |
| 66 // actually be ASCII, which means this cast is safe as long as you don't loo
k | |
| 67 // at the fragment component. | |
| 68 return reinterpret_cast<const char*>(spec.characters8()); | |
| 69 } | |
| 70 | |
| 71 // Returns the characters for the given string, or a pointer to a static empty | |
| 72 // string if the input string is null. This will always ensure we have a non- | |
| 73 // null character pointer since ReplaceComponents has special meaning for null. | |
| 74 static const char* charactersOrEmpty(const StringUTF8Adaptor& string) | |
| 75 { | |
| 76 static const char zero = 0; | |
| 77 return string.data() ? string.data() : &zero; | |
| 78 } | |
| 79 | |
| 80 static bool isSchemeFirstChar(char c) | |
| 81 { | |
| 82 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | |
| 83 } | |
| 84 | |
| 85 static bool isSchemeChar(char c) | |
| 86 { | |
| 87 return isSchemeFirstChar(c) || (c >= '0' && c <= '9') || c == '.' || c == '-
' || c == '+'; | |
| 88 } | |
| 89 | |
| 90 static bool isUnicodeEncoding(const WTF::TextEncoding* encoding) | |
| 91 { | |
| 92 return encoding->encodingForFormSubmission() == UTF8Encoding(); | |
| 93 } | |
| 94 | |
| 95 namespace { | |
| 96 | |
| 97 class KURLCharsetConverter : public url_canon::CharsetConverter { | |
| 98 public: | |
| 99 // The encoding parameter may be 0, but in this case the object must not be
called. | |
| 100 explicit KURLCharsetConverter(const WTF::TextEncoding* encoding) | |
| 101 : m_encoding(encoding) | |
| 102 { | |
| 103 } | |
| 104 | |
| 105 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLe
ngth, url_canon::CanonOutput* output) | |
| 106 { | |
| 107 CString encoded = m_encoding->normalizeAndEncode(String(input, inputLeng
th), WTF::URLEncodedEntitiesForUnencodables); | |
| 108 output->Append(encoded.data(), static_cast<int>(encoded.length())); | |
| 109 } | |
| 110 | |
| 111 private: | |
| 112 const WTF::TextEncoding* m_encoding; | |
| 113 }; | |
| 114 | |
| 115 } // namespace | |
| 116 | |
| 117 bool isValidProtocol(const String& protocol) | |
| 118 { | |
| 119 // RFC3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
| 120 if (protocol.isEmpty()) | |
| 121 return false; | |
| 122 if (!isSchemeFirstChar(protocol[0])) | |
| 123 return false; | |
| 124 unsigned protocolLength = protocol.length(); | |
| 125 for (unsigned i = 1; i < protocolLength; i++) { | |
| 126 if (!isSchemeChar(protocol[i])) | |
| 127 return false; | |
| 128 } | |
| 129 return true; | |
| 130 } | |
| 131 | |
| 132 String KURL::strippedForUseAsReferrer() const | |
| 133 { | |
| 134 KURL referrer(*this); | |
| 135 referrer.setUser(String()); | |
| 136 referrer.setPass(String()); | |
| 137 referrer.removeFragmentIdentifier(); | |
| 138 return referrer.string(); | |
| 139 } | |
| 140 | |
| 141 bool KURL::isLocalFile() const | |
| 142 { | |
| 143 // Including feed here might be a bad idea since drag and drop uses this che
ck | |
| 144 // and including feed would allow feeds to potentially let someone's blog | |
| 145 // read the contents of the clipboard on a drag, even without a drop. | |
| 146 // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function. | |
| 147 return protocolIs("file"); | |
| 148 } | |
| 149 | |
| 150 bool protocolIsJavaScript(const String& url) | |
| 151 { | |
| 152 return protocolIs(url, "javascript"); | |
| 153 } | |
| 154 | |
| 155 const KURL& blankURL() | |
| 156 { | |
| 157 DEFINE_STATIC_LOCAL(KURL, staticBlankURL, (ParsedURLString, "about:blank")); | |
| 158 return staticBlankURL; | |
| 159 } | |
| 160 | |
| 161 bool KURL::isBlankURL() const | |
| 162 { | |
| 163 return protocolIs("about"); | |
| 164 } | |
| 165 | |
| 166 String KURL::elidedString() const | |
| 167 { | |
| 168 if (string().length() <= 1024) | |
| 169 return string(); | |
| 170 | |
| 171 return string().left(511) + "..." + string().right(510); | |
| 172 } | |
| 173 | |
| 174 // Initializes with a string representing an absolute URL. No encoding | |
| 175 // information is specified. This generally happens when a KURL is converted | |
| 176 // to a string and then converted back. In this case, the URL is already | |
| 177 // canonical and in proper escaped form so needs no encoding. We treat it as | |
| 178 // UTF-8 just in case. | |
| 179 KURL::KURL(ParsedURLStringTag, const String& url) | |
| 180 { | |
| 181 if (!url.isNull()) | |
| 182 init(KURL(), url, 0); | |
| 183 else { | |
| 184 // WebCore expects us to preserve the nullness of strings when this | |
| 185 // constructor is used. In all other cases, it expects a non-null | |
| 186 // empty string, which is what init() will create. | |
| 187 m_isValid = false; | |
| 188 m_protocolIsInHTTPFamily = false; | |
| 189 } | |
| 190 } | |
| 191 | |
| 192 KURL KURL::createIsolated(ParsedURLStringTag, const String& url) | |
| 193 { | |
| 194 // FIXME: We should be able to skip this extra copy and created an | |
| 195 // isolated KURL more efficiently. | |
| 196 return KURL(ParsedURLString, url).copy(); | |
| 197 } | |
| 198 | |
| 199 // Constructs a new URL given a base URL and a possibly relative input URL. | |
| 200 // This assumes UTF-8 encoding. | |
| 201 KURL::KURL(const KURL& base, const String& relative) | |
| 202 { | |
| 203 init(base, relative, 0); | |
| 204 } | |
| 205 | |
| 206 // Constructs a new URL given a base URL and a possibly relative input URL. | |
| 207 // Any query portion of the relative URL will be encoded in the given encoding. | |
| 208 KURL::KURL(const KURL& base, const String& relative, const WTF::TextEncoding& en
coding) | |
| 209 { | |
| 210 init(base, relative, &encoding.encodingForFormSubmission()); | |
| 211 } | |
| 212 | |
| 213 KURL::KURL(const AtomicString& canonicalString, const url_parse::Parsed& parsed,
bool isValid) | |
| 214 : m_isValid(isValid) | |
| 215 , m_protocolIsInHTTPFamily(false) | |
| 216 , m_parsed(parsed) | |
| 217 , m_string(canonicalString) | |
| 218 { | |
| 219 initProtocolIsInHTTPFamily(); | |
| 220 initInnerURL(); | |
| 221 } | |
| 222 | |
| 223 KURL::KURL(WTF::HashTableDeletedValueType) | |
| 224 : m_isValid(false) | |
| 225 , m_protocolIsInHTTPFamily(false) | |
| 226 , m_string(WTF::HashTableDeletedValue) | |
| 227 { | |
| 228 } | |
| 229 | |
| 230 KURL::KURL(const KURL& other) | |
| 231 : m_isValid(other.m_isValid) | |
| 232 , m_protocolIsInHTTPFamily(other.m_protocolIsInHTTPFamily) | |
| 233 , m_parsed(other.m_parsed) | |
| 234 , m_string(other.m_string) | |
| 235 { | |
| 236 if (other.m_innerURL.get()) | |
| 237 m_innerURL = adoptPtr(new KURL(other.m_innerURL->copy())); | |
| 238 } | |
| 239 | |
| 240 KURL& KURL::operator=(const KURL& other) | |
| 241 { | |
| 242 m_isValid = other.m_isValid; | |
| 243 m_protocolIsInHTTPFamily = other.m_protocolIsInHTTPFamily; | |
| 244 m_parsed = other.m_parsed; | |
| 245 m_string = other.m_string; | |
| 246 if (other.m_innerURL) | |
| 247 m_innerURL = adoptPtr(new KURL(other.m_innerURL->copy())); | |
| 248 else | |
| 249 m_innerURL.clear(); | |
| 250 return *this; | |
| 251 } | |
| 252 | |
| 253 KURL KURL::copy() const | |
| 254 { | |
| 255 KURL result; | |
| 256 result.m_isValid = m_isValid; | |
| 257 result.m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily; | |
| 258 result.m_parsed = m_parsed; | |
| 259 result.m_string = m_string.isolatedCopy(); | |
| 260 if (result.m_innerURL) | |
| 261 result.m_innerURL = adoptPtr(new KURL(m_innerURL->copy())); | |
| 262 return result; | |
| 263 } | |
| 264 | |
| 265 bool KURL::isNull() const | |
| 266 { | |
| 267 return m_string.isNull(); | |
| 268 } | |
| 269 | |
| 270 bool KURL::isEmpty() const | |
| 271 { | |
| 272 return m_string.isEmpty(); | |
| 273 } | |
| 274 | |
| 275 bool KURL::isValid() const | |
| 276 { | |
| 277 return m_isValid; | |
| 278 } | |
| 279 | |
| 280 bool KURL::hasPort() const | |
| 281 { | |
| 282 return hostEnd() < pathStart(); | |
| 283 } | |
| 284 | |
| 285 bool KURL::protocolIsInHTTPFamily() const | |
| 286 { | |
| 287 return m_protocolIsInHTTPFamily; | |
| 288 } | |
| 289 | |
| 290 bool KURL::hasPath() const | |
| 291 { | |
| 292 // Note that http://www.google.com/" has a path, the path is "/". This can | |
| 293 // return false only for invalid or nonstandard URLs. | |
| 294 return m_parsed.path.len >= 0; | |
| 295 } | |
| 296 | |
| 297 // We handle "parameters" separated by a semicolon, while KURL.cpp does not, | |
| 298 // which can lead to different results in some cases. | |
| 299 String KURL::lastPathComponent() const | |
| 300 { | |
| 301 if (!m_isValid) | |
| 302 return stringForInvalidComponent(); | |
| 303 ASSERT(!m_string.isNull()); | |
| 304 | |
| 305 // When the output ends in a slash, WebCore has different expectations than | |
| 306 // the GoogleURL library. For "/foo/bar/" the library will return the empty | |
| 307 // string, but WebCore wants "bar". | |
| 308 url_parse::Component path = m_parsed.path; | |
| 309 if (path.len > 0 && m_string[path.end() - 1] == '/') | |
| 310 path.len--; | |
| 311 | |
| 312 url_parse::Component file; | |
| 313 if (m_string.is8Bit()) | |
| 314 url_parse::ExtractFileName(asURLChar8Subtle(m_string), path, &file); | |
| 315 else | |
| 316 url_parse::ExtractFileName(m_string.characters16(), path, &file); | |
| 317 | |
| 318 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns | |
| 319 // a null string when the path is empty, which we duplicate here. | |
| 320 if (!file.is_nonempty()) | |
| 321 return String(); | |
| 322 return componentString(file); | |
| 323 } | |
| 324 | |
| 325 String KURL::protocol() const | |
| 326 { | |
| 327 return componentString(m_parsed.scheme); | |
| 328 } | |
| 329 | |
| 330 String KURL::host() const | |
| 331 { | |
| 332 return componentString(m_parsed.host); | |
| 333 } | |
| 334 | |
| 335 // Returns 0 when there is no port. | |
| 336 // | |
| 337 // We treat URL's with out-of-range port numbers as invalid URLs, and they will | |
| 338 // be rejected by the canonicalizer. KURL.cpp will allow them in parsing, but | |
| 339 // return invalidPortNumber from this port() function, so we mirror that behavio
r here. | |
| 340 unsigned short KURL::port() const | |
| 341 { | |
| 342 if (!m_isValid || m_parsed.port.len <= 0) | |
| 343 return 0; | |
| 344 ASSERT(!m_string.isNull()); | |
| 345 int port = m_string.is8Bit() ? | |
| 346 url_parse::ParsePort(asURLChar8Subtle(m_string), m_parsed.port) : | |
| 347 url_parse::ParsePort(m_string.characters16(), m_parsed.port); | |
| 348 ASSERT(port != url_parse::PORT_UNSPECIFIED); // Checked port.len <= 0 before
. | |
| 349 | |
| 350 if (port == url_parse::PORT_INVALID || port > maximumValidPortNumber) // Mim
ic KURL::port() | |
| 351 port = invalidPortNumber; | |
| 352 | |
| 353 return static_cast<unsigned short>(port); | |
| 354 } | |
| 355 | |
| 356 String KURL::pass() const | |
| 357 { | |
| 358 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns | |
| 359 // a null string when the password is empty, which we duplicate here. | |
| 360 if (!m_parsed.password.is_nonempty()) | |
| 361 return String(); | |
| 362 return componentString(m_parsed.password); | |
| 363 } | |
| 364 | |
| 365 String KURL::user() const | |
| 366 { | |
| 367 return componentString(m_parsed.username); | |
| 368 } | |
| 369 | |
| 370 String KURL::fragmentIdentifier() const | |
| 371 { | |
| 372 // Empty but present refs ("foo.com/bar#") should result in the empty | |
| 373 // string, which componentString will produce. Nonexistent refs | |
| 374 // should be the null string. | |
| 375 if (!m_parsed.ref.is_valid()) | |
| 376 return String(); | |
| 377 return componentString(m_parsed.ref); | |
| 378 } | |
| 379 | |
| 380 bool KURL::hasFragmentIdentifier() const | |
| 381 { | |
| 382 return m_parsed.ref.len >= 0; | |
| 383 } | |
| 384 | |
| 385 String KURL::baseAsString() const | |
| 386 { | |
| 387 // FIXME: There is probably a more efficient way to do this? | |
| 388 return m_string.left(pathAfterLastSlash()); | |
| 389 } | |
| 390 | |
| 391 String KURL::query() const | |
| 392 { | |
| 393 if (m_parsed.query.len >= 0) | |
| 394 return componentString(m_parsed.query); | |
| 395 | |
| 396 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns | |
| 397 // an empty string when the query is empty rather than a null (not sure | |
| 398 // which is right). | |
| 399 // Returns a null if the query is not specified, instead of empty. | |
| 400 if (m_parsed.query.is_valid()) | |
| 401 return emptyString(); | |
| 402 return String(); | |
| 403 } | |
| 404 | |
| 405 String KURL::path() const | |
| 406 { | |
| 407 return componentString(m_parsed.path); | |
| 408 } | |
| 409 | |
| 410 bool KURL::setProtocol(const String& protocol) | |
| 411 { | |
| 412 // Firefox and IE remove everything after the first ':'. | |
| 413 int separatorPosition = protocol.find(':'); | |
| 414 String newProtocol = protocol.substring(0, separatorPosition); | |
| 415 StringUTF8Adaptor newProtocolUTF8(newProtocol); | |
| 416 | |
| 417 // If KURL is given an invalid scheme, it returns failure without modifying | |
| 418 // the URL at all. This is in contrast to most other setters which modify | |
| 419 // the URL and set "m_isValid." | |
| 420 url_canon::RawCanonOutputT<char> canonProtocol; | |
| 421 url_parse::Component protocolComponent; | |
| 422 if (!url_canon::CanonicalizeScheme(newProtocolUTF8.data(), url_parse::Compon
ent(0, newProtocolUTF8.length()), &canonProtocol, &protocolComponent) | |
| 423 || !protocolComponent.is_nonempty()) | |
| 424 return false; | |
| 425 | |
| 426 url_canon::Replacements<char> replacements; | |
| 427 replacements.SetScheme(charactersOrEmpty(newProtocolUTF8), url_parse::Compon
ent(0, newProtocolUTF8.length())); | |
| 428 replaceComponents(replacements); | |
| 429 | |
| 430 // isValid could be false but we still return true here. This is because | |
| 431 // WebCore or JS scripts can build up a URL by setting individual | |
| 432 // components, and a JS exception is based on the return value of this | |
| 433 // function. We want to throw the exception and stop the script only when | |
| 434 // its trying to set a bad protocol, and not when it maybe just hasn't | |
| 435 // finished building up its final scheme. | |
| 436 return true; | |
| 437 } | |
| 438 | |
| 439 void KURL::setHost(const String& host) | |
| 440 { | |
| 441 StringUTF8Adaptor hostUTF8(host); | |
| 442 url_canon::Replacements<char> replacements; | |
| 443 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0, ho
stUTF8.length())); | |
| 444 replaceComponents(replacements); | |
| 445 } | |
| 446 | |
| 447 static String parsePortFromStringPosition(const String& value, unsigned portStar
t) | |
| 448 { | |
| 449 // "008080junk" needs to be treated as port "8080" and "000" as "0". | |
| 450 size_t length = value.length(); | |
| 451 unsigned portEnd = portStart; | |
| 452 while (isASCIIDigit(value[portEnd]) && portEnd < length) | |
| 453 ++portEnd; | |
| 454 while (value[portStart] == '0' && portStart < portEnd - 1) | |
| 455 ++portStart; | |
| 456 | |
| 457 // Required for backwards compat. | |
| 458 // https://www.w3.org/Bugs/Public/show_bug.cgi?id=23463 | |
| 459 if (portStart == portEnd) | |
| 460 return "0"; | |
| 461 | |
| 462 return value.substring(portStart, portEnd - portStart); | |
| 463 } | |
| 464 | |
| 465 void KURL::setHostAndPort(const String& hostAndPort) | |
| 466 { | |
| 467 size_t separator = hostAndPort.find(':'); | |
| 468 if (!separator) | |
| 469 return; | |
| 470 | |
| 471 if (separator == kNotFound) { | |
| 472 url_canon::Replacements<char> replacements; | |
| 473 StringUTF8Adaptor hostUTF8(hostAndPort); | |
| 474 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0
, hostUTF8.length())); | |
| 475 replaceComponents(replacements); | |
| 476 return; | |
| 477 } | |
| 478 | |
| 479 String host = hostAndPort.substring(0, separator); | |
| 480 String port = parsePortFromStringPosition(hostAndPort, separator + 1); | |
| 481 | |
| 482 StringUTF8Adaptor hostUTF8(host); | |
| 483 StringUTF8Adaptor portUTF8(port); | |
| 484 | |
| 485 url_canon::Replacements<char> replacements; | |
| 486 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0, ho
stUTF8.length())); | |
| 487 replacements.SetPort(charactersOrEmpty(portUTF8), url_parse::Component(0, po
rtUTF8.length())); | |
| 488 replaceComponents(replacements); | |
| 489 } | |
| 490 | |
| 491 void KURL::removePort() | |
| 492 { | |
| 493 if (!hasPort()) | |
| 494 return; | |
| 495 url_canon::Replacements<char> replacements; | |
| 496 replacements.ClearPort(); | |
| 497 replaceComponents(replacements); | |
| 498 } | |
| 499 | |
| 500 void KURL::setPort(const String& port) | |
| 501 { | |
| 502 String parsedPort = parsePortFromStringPosition(port, 0); | |
| 503 setPort(parsedPort.toUInt()); | |
| 504 } | |
| 505 | |
| 506 void KURL::setPort(unsigned short port) | |
| 507 { | |
| 508 if (isDefaultPortForProtocol(port, protocol())) { | |
| 509 removePort(); | |
| 510 return; | |
| 511 } | |
| 512 | |
| 513 String portString = String::number(port); | |
| 514 ASSERT(portString.is8Bit()); | |
| 515 | |
| 516 url_canon::Replacements<char> replacements; | |
| 517 replacements.SetPort(reinterpret_cast<const char*>(portString.characters8())
, url_parse::Component(0, portString.length())); | |
| 518 replaceComponents(replacements); | |
| 519 } | |
| 520 | |
| 521 void KURL::setUser(const String& user) | |
| 522 { | |
| 523 // This function is commonly called to clear the username, which we | |
| 524 // normally don't have, so we optimize this case. | |
| 525 if (user.isEmpty() && !m_parsed.username.is_valid()) | |
| 526 return; | |
| 527 | |
| 528 // The canonicalizer will clear any usernames that are empty, so we | |
| 529 // don't have to explicitly call ClearUsername() here. | |
| 530 StringUTF8Adaptor userUTF8(user); | |
| 531 url_canon::Replacements<char> replacements; | |
| 532 replacements.SetUsername(charactersOrEmpty(userUTF8), url_parse::Component(0
, userUTF8.length())); | |
| 533 replaceComponents(replacements); | |
| 534 } | |
| 535 | |
| 536 void KURL::setPass(const String& pass) | |
| 537 { | |
| 538 // This function is commonly called to clear the password, which we | |
| 539 // normally don't have, so we optimize this case. | |
| 540 if (pass.isEmpty() && !m_parsed.password.is_valid()) | |
| 541 return; | |
| 542 | |
| 543 // The canonicalizer will clear any passwords that are empty, so we | |
| 544 // don't have to explicitly call ClearUsername() here. | |
| 545 StringUTF8Adaptor passUTF8(pass); | |
| 546 url_canon::Replacements<char> replacements; | |
| 547 replacements.SetPassword(charactersOrEmpty(passUTF8), url_parse::Component(0
, passUTF8.length())); | |
| 548 replaceComponents(replacements); | |
| 549 } | |
| 550 | |
| 551 void KURL::setFragmentIdentifier(const String& fragment) | |
| 552 { | |
| 553 // This function is commonly called to clear the ref, which we | |
| 554 // normally don't have, so we optimize this case. | |
| 555 if (fragment.isNull() && !m_parsed.ref.is_valid()) | |
| 556 return; | |
| 557 | |
| 558 StringUTF8Adaptor fragmentUTF8(fragment); | |
| 559 | |
| 560 url_canon::Replacements<char> replacements; | |
| 561 if (fragment.isNull()) | |
| 562 replacements.ClearRef(); | |
| 563 else | |
| 564 replacements.SetRef(charactersOrEmpty(fragmentUTF8), url_parse::Componen
t(0, fragmentUTF8.length())); | |
| 565 replaceComponents(replacements); | |
| 566 } | |
| 567 | |
| 568 void KURL::removeFragmentIdentifier() | |
| 569 { | |
| 570 url_canon::Replacements<char> replacements; | |
| 571 replacements.ClearRef(); | |
| 572 replaceComponents(replacements); | |
| 573 } | |
| 574 | |
| 575 void KURL::setQuery(const String& query) | |
| 576 { | |
| 577 StringUTF8Adaptor queryUTF8(query); | |
| 578 url_canon::Replacements<char> replacements; | |
| 579 if (query.isNull()) { | |
| 580 // KURL.cpp sets to null to clear any query. | |
| 581 replacements.ClearQuery(); | |
| 582 } else if (query.length() > 0 && query[0] == '?') { | |
| 583 // WebCore expects the query string to begin with a question mark, but | |
| 584 // GoogleURL doesn't. So we trim off the question mark when setting. | |
| 585 replacements.SetQuery(charactersOrEmpty(queryUTF8), url_parse::Component
(1, queryUTF8.length() - 1)); | |
| 586 } else { | |
| 587 // When set with the empty string or something that doesn't begin with | |
| 588 // a question mark, KURL.cpp will add a question mark for you. The only | |
| 589 // way this isn't compatible is if you call this function with an empty | |
| 590 // string. KURL.cpp will leave a '?' with nothing following it in the | |
| 591 // URL, whereas we'll clear it. | |
| 592 // FIXME We should eliminate this difference. | |
| 593 replacements.SetQuery(charactersOrEmpty(queryUTF8), url_parse::Component
(0, queryUTF8.length())); | |
| 594 } | |
| 595 replaceComponents(replacements); | |
| 596 } | |
| 597 | |
| 598 void KURL::setPath(const String& path) | |
| 599 { | |
| 600 // Empty paths will be canonicalized to "/", so we don't have to worry | |
| 601 // about calling ClearPath(). | |
| 602 StringUTF8Adaptor pathUTF8(path); | |
| 603 url_canon::Replacements<char> replacements; | |
| 604 replacements.SetPath(charactersOrEmpty(pathUTF8), url_parse::Component(0, pa
thUTF8.length())); | |
| 605 replaceComponents(replacements); | |
| 606 } | |
| 607 | |
| 608 String decodeURLEscapeSequences(const String& string) | |
| 609 { | |
| 610 return decodeURLEscapeSequences(string, UTF8Encoding()); | |
| 611 } | |
| 612 | |
| 613 // In KURL.cpp's implementation, this is called by every component getter. | |
| 614 // It will unescape every character, including '\0'. This is scary, and may | |
| 615 // cause security holes. We never call this function for components, and | |
| 616 // just return the ASCII versions instead. | |
| 617 // | |
| 618 // This function is also used to decode javascript: URLs and as a general | |
| 619 // purpose unescaping function. | |
| 620 // | |
| 621 // FIXME These should be merged to the KURL.cpp implementation. | |
| 622 String decodeURLEscapeSequences(const String& string, const WTF::TextEncoding& e
ncoding) | |
| 623 { | |
| 624 // FIXME We can probably use KURL.cpp's version of this function | |
| 625 // without modification. However, I'm concerned about | |
| 626 // https://bugs.webkit.org/show_bug.cgi?id=20559 so am keeping this old | |
| 627 // custom code for now. Using their version will also fix the bug that | |
| 628 // we ignore the encoding. | |
| 629 // | |
| 630 // FIXME b/1350291: This does not get called very often. We just convert | |
| 631 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of | |
| 632 // sucks, and we don't use the encoding properly, which will make some | |
| 633 // obscure anchor navigations fail. | |
| 634 StringUTF8Adaptor stringUTF8(string); | |
| 635 url_canon::RawCanonOutputT<url_parse::UTF16Char> unescaped; | |
| 636 url_util::DecodeURLEscapeSequences(stringUTF8.data(), stringUTF8.length(), &
unescaped); | |
| 637 return StringImpl::create8BitIfPossible(reinterpret_cast<UChar*>(unescaped.d
ata()), unescaped.length()); | |
| 638 } | |
| 639 | |
| 640 String encodeWithURLEscapeSequences(const String& notEncodedString) | |
| 641 { | |
| 642 CString utf8 = UTF8Encoding().normalizeAndEncode(notEncodedString, WTF::URLE
ncodedEntitiesForUnencodables); | |
| 643 | |
| 644 url_canon::RawCanonOutputT<char> buffer; | |
| 645 int inputLength = utf8.length(); | |
| 646 if (buffer.length() < inputLength * 3) | |
| 647 buffer.Resize(inputLength * 3); | |
| 648 | |
| 649 url_util::EncodeURIComponent(utf8.data(), inputLength, &buffer); | |
| 650 String escaped(buffer.data(), buffer.length()); | |
| 651 // Unescape '/'; it's safe and much prettier. | |
| 652 escaped.replace("%2F", "/"); | |
| 653 return escaped; | |
| 654 } | |
| 655 | |
| 656 bool KURL::isHierarchical() const | |
| 657 { | |
| 658 if (m_string.isNull() || !m_parsed.scheme.is_nonempty()) | |
| 659 return false; | |
| 660 return m_string.is8Bit() ? | |
| 661 url_util::IsStandard(asURLChar8Subtle(m_string), m_parsed.scheme) : | |
| 662 url_util::IsStandard(m_string.characters16(), m_parsed.scheme); | |
| 663 } | |
| 664 | |
| 665 #ifndef NDEBUG | |
| 666 void KURL::print() const | |
| 667 { | |
| 668 printf("%s\n", m_string.utf8().data()); | |
| 669 } | |
| 670 #endif | |
| 671 | |
| 672 bool equalIgnoringFragmentIdentifier(const KURL& a, const KURL& b) | |
| 673 { | |
| 674 // Compute the length of each URL without its ref. Note that the reference | |
| 675 // begin (if it exists) points to the character *after* the '#', so we need | |
| 676 // to subtract one. | |
| 677 int aLength = a.m_string.length(); | |
| 678 if (a.m_parsed.ref.len >= 0) | |
| 679 aLength = a.m_parsed.ref.begin - 1; | |
| 680 | |
| 681 int bLength = b.m_string.length(); | |
| 682 if (b.m_parsed.ref.len >= 0) | |
| 683 bLength = b.m_parsed.ref.begin - 1; | |
| 684 | |
| 685 if (aLength != bLength) | |
| 686 return false; | |
| 687 | |
| 688 const String& aString = a.m_string; | |
| 689 const String& bString = b.m_string; | |
| 690 // FIXME: Abstraction this into a function in WTFString.h. | |
| 691 for (int i = 0; i < aLength; ++i) { | |
| 692 if (aString[i] != bString[i]) | |
| 693 return false; | |
| 694 } | |
| 695 return true; | |
| 696 } | |
| 697 | |
| 698 unsigned KURL::hostStart() const | |
| 699 { | |
| 700 return m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false); | |
| 701 } | |
| 702 | |
| 703 unsigned KURL::hostEnd() const | |
| 704 { | |
| 705 return m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true); | |
| 706 } | |
| 707 | |
| 708 unsigned KURL::pathStart() const | |
| 709 { | |
| 710 return m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false); | |
| 711 } | |
| 712 | |
| 713 unsigned KURL::pathEnd() const | |
| 714 { | |
| 715 return m_parsed.CountCharactersBefore(url_parse::Parsed::QUERY, true); | |
| 716 } | |
| 717 | |
| 718 unsigned KURL::pathAfterLastSlash() const | |
| 719 { | |
| 720 if (m_string.isNull()) | |
| 721 return 0; | |
| 722 if (!m_isValid || !m_parsed.path.is_valid()) | |
| 723 return m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false); | |
| 724 url_parse::Component filename; | |
| 725 if (m_string.is8Bit()) | |
| 726 url_parse::ExtractFileName(asURLChar8Subtle(m_string), m_parsed.path, &f
ilename); | |
| 727 else | |
| 728 url_parse::ExtractFileName(m_string.characters16(), m_parsed.path, &file
name); | |
| 729 return filename.begin; | |
| 730 } | |
| 731 | |
| 732 bool protocolIs(const String& url, const char* protocol) | |
| 733 { | |
| 734 assertProtocolIsGood(protocol); | |
| 735 if (url.isNull()) | |
| 736 return false; | |
| 737 if (url.is8Bit()) | |
| 738 return url_util::FindAndCompareScheme(asURLChar8Subtle(url), url.length(
), protocol, 0); | |
| 739 return url_util::FindAndCompareScheme(url.characters16(), url.length(), prot
ocol, 0); | |
| 740 } | |
| 741 | |
| 742 void KURL::init(const KURL& base, const String& relative, const WTF::TextEncodin
g* queryEncoding) | |
| 743 { | |
| 744 if (!relative.isNull() && relative.is8Bit()) { | |
| 745 StringUTF8Adaptor relativeUTF8(relative); | |
| 746 init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding); | |
| 747 } else | |
| 748 init(base, relative.characters16(), relative.length(), queryEncoding); | |
| 749 initProtocolIsInHTTPFamily(); | |
| 750 initInnerURL(); | |
| 751 } | |
| 752 | |
| 753 template <typename CHAR> | |
| 754 void KURL::init(const KURL& base, const CHAR* relative, int relativeLength, cons
t WTF::TextEncoding* queryEncoding) | |
| 755 { | |
| 756 // As a performance optimization, we do not use the charset converter | |
| 757 // if encoding is UTF-8 or other Unicode encodings. Note that this is | |
| 758 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more | |
| 759 // efficient with no charset converter object because it can do UTF-8 | |
| 760 // internally with no extra copies. | |
| 761 | |
| 762 // We feel free to make the charset converter object every time since it's | |
| 763 // just a wrapper around a reference. | |
| 764 KURLCharsetConverter charsetConverterObject(queryEncoding); | |
| 765 KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncodin
g(queryEncoding)) ? 0 : &charsetConverterObject; | |
| 766 | |
| 767 StringUTF8Adaptor baseUTF8(base.string()); | |
| 768 | |
| 769 url_canon::RawCanonOutputT<char> output; | |
| 770 m_isValid = url_util::ResolveRelative(baseUTF8.data(), baseUTF8.length(), ba
se.m_parsed, relative, relativeLength, charsetConverter, &output, &m_parsed); | |
| 771 | |
| 772 // See FIXME in KURLPrivate in the header. If canonicalization has not | |
| 773 // changed the string, we can avoid an extra allocation by using assignment. | |
| 774 m_string = AtomicString::fromUTF8(output.data(), output.length()); | |
| 775 } | |
| 776 | |
| 777 void KURL::initInnerURL() | |
| 778 { | |
| 779 if (!m_isValid) { | |
| 780 m_innerURL.clear(); | |
| 781 return; | |
| 782 } | |
| 783 if (url_parse::Parsed* innerParsed = m_parsed.inner_parsed()) | |
| 784 m_innerURL = adoptPtr(new KURL(ParsedURLString, m_string.substring(inner
Parsed->scheme.begin, innerParsed->Length() - innerParsed->scheme.begin))); | |
| 785 else | |
| 786 m_innerURL.clear(); | |
| 787 } | |
| 788 | |
| 789 template<typename CHAR> | |
| 790 bool internalProtocolIs(const url_parse::Component& scheme, const CHAR* spec, co
nst char* protocol) | |
| 791 { | |
| 792 const CHAR* begin = spec + scheme.begin; | |
| 793 const CHAR* end = begin + scheme.len; | |
| 794 | |
| 795 while (begin != end && *protocol) { | |
| 796 ASSERT(toASCIILower(*protocol) == *protocol); | |
| 797 if (toASCIILower(*begin++) != *protocol++) | |
| 798 return false; | |
| 799 } | |
| 800 | |
| 801 // Both strings are equal (ignoring case) if and only if all of the characte
rs were equal, | |
| 802 // and the end of both has been reached. | |
| 803 return begin == end && !*protocol; | |
| 804 } | |
| 805 | |
| 806 template<typename CHAR> | |
| 807 bool checkIfProtocolIsInHTTPFamily(const url_parse::Component& scheme, const CHA
R* spec) | |
| 808 { | |
| 809 if (scheme.len == 4) | |
| 810 return internalProtocolIs(scheme, spec, "http"); | |
| 811 if (scheme.len == 5) | |
| 812 return internalProtocolIs(scheme, spec, "https"); | |
| 813 return false; | |
| 814 } | |
| 815 | |
| 816 void KURL::initProtocolIsInHTTPFamily() | |
| 817 { | |
| 818 if (!m_isValid) { | |
| 819 m_protocolIsInHTTPFamily = false; | |
| 820 return; | |
| 821 } | |
| 822 | |
| 823 ASSERT(!m_string.isNull()); | |
| 824 m_protocolIsInHTTPFamily = m_string.is8Bit() ? | |
| 825 checkIfProtocolIsInHTTPFamily(m_parsed.scheme, m_string.characters8()) : | |
| 826 checkIfProtocolIsInHTTPFamily(m_parsed.scheme, m_string.characters16()); | |
| 827 } | |
| 828 | |
| 829 bool KURL::protocolIs(const char* protocol) const | |
| 830 { | |
| 831 assertProtocolIsGood(protocol); | |
| 832 | |
| 833 // JavaScript URLs are "valid" and should be executed even if KURL decides t
hey are invalid. | |
| 834 // The free function protocolIsJavaScript() should be used instead. | |
| 835 // FIXME: Chromium code needs to be fixed for this assert to be enabled. ASS
ERT(strcmp(protocol, "javascript")); | |
| 836 | |
| 837 if (m_string.isNull() || m_parsed.scheme.len <= 0) | |
| 838 return *protocol == '\0'; | |
| 839 | |
| 840 return m_string.is8Bit() ? | |
| 841 internalProtocolIs(m_parsed.scheme, m_string.characters8(), protocol) : | |
| 842 internalProtocolIs(m_parsed.scheme, m_string.characters16(), protocol); | |
| 843 } | |
| 844 | |
| 845 String KURL::stringForInvalidComponent() const | |
| 846 { | |
| 847 if (m_string.isNull()) | |
| 848 return String(); | |
| 849 return emptyString(); | |
| 850 } | |
| 851 | |
| 852 String KURL::componentString(const url_parse::Component& component) const | |
| 853 { | |
| 854 if (!m_isValid || component.len <= 0) | |
| 855 return stringForInvalidComponent(); | |
| 856 // begin and len are in terms of bytes which do not match | |
| 857 // if string() is UTF-16 and input contains non-ASCII characters. | |
| 858 // However, the only part in urlString that can contain non-ASCII | |
| 859 // characters is 'ref' at the end of the string. In that case, | |
| 860 // begin will always match the actual value and len (in terms of | |
| 861 // byte) will be longer than what's needed by 'mid'. However, mid | |
| 862 // truncates len to avoid go past the end of a string so that we can | |
| 863 // get away without doing anything here. | |
| 864 return string().substring(component.begin, component.len); | |
| 865 } | |
| 866 | |
| 867 template<typename CHAR> | |
| 868 void KURL::replaceComponents(const url_canon::Replacements<CHAR>& replacements) | |
| 869 { | |
| 870 url_canon::RawCanonOutputT<char> output; | |
| 871 url_parse::Parsed newParsed; | |
| 872 | |
| 873 StringUTF8Adaptor utf8(m_string); | |
| 874 m_isValid = url_util::ReplaceComponents(utf8.data(), utf8.length(), m_parsed
, replacements, 0, &output, &newParsed); | |
| 875 | |
| 876 m_parsed = newParsed; | |
| 877 m_string = AtomicString::fromUTF8(output.data(), output.length()); | |
| 878 } | |
| 879 | |
| 880 bool KURL::isSafeToSendToAnotherThread() const | |
| 881 { | |
| 882 return m_string.isSafeToSendToAnotherThread() | |
| 883 && (!m_innerURL || m_innerURL->isSafeToSendToAnotherThread()); | |
| 884 } | |
| 885 | |
| 886 } // namespace WebCore | |
| OLD | NEW |