Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: Source/weborigin/KURL.cpp

Issue 54053006: Move weborigin/ under platform/ so that it may someday call platform APIs (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Stale refernence to weboriginexport in .gpyi Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Source/weborigin/KURL.h ('k') | Source/weborigin/KURLHash.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2004, 2007, 2008, 2011, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Research In Motion Limited. All rights reserved.
4 * Copyright (C) 2008, 2009, 2011 Google Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29 #include "weborigin/KURL.h"
30
31 #include "weborigin/KnownPorts.h"
32 #include "wtf/HashMap.h"
33 #include "wtf/StdLibExtras.h"
34 #include "wtf/text/CString.h"
35 #include "wtf/text/StringHash.h"
36 #include "wtf/text/StringUTF8Adaptor.h"
37 #include "wtf/text/TextEncoding.h"
38 #include <algorithm>
39 #include <url/url_util.h>
40 #ifndef NDEBUG
41 #include <stdio.h>
42 #endif
43
44 namespace WebCore {
45
46 static const int maximumValidPortNumber = 0xFFFE;
47 static const int invalidPortNumber = 0xFFFF;
48
49 static void assertProtocolIsGood(const char* protocol)
50 {
51 #ifndef NDEBUG
52 const char* p = protocol;
53 while (*p) {
54 ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));
55 ++p;
56 }
57 #endif
58 }
59
60 // Note: You must ensure that |spec| is a valid canonicalized URL before calling this function.
61 static const char* asURLChar8Subtle(const String& spec)
62 {
63 ASSERT(spec.is8Bit());
64 // characters8 really return characters in Latin-1, but because we canonical ize
65 // URL strings, we know that everything before the fragment identifier will
66 // actually be ASCII, which means this cast is safe as long as you don't loo k
67 // at the fragment component.
68 return reinterpret_cast<const char*>(spec.characters8());
69 }
70
71 // Returns the characters for the given string, or a pointer to a static empty
72 // string if the input string is null. This will always ensure we have a non-
73 // null character pointer since ReplaceComponents has special meaning for null.
74 static const char* charactersOrEmpty(const StringUTF8Adaptor& string)
75 {
76 static const char zero = 0;
77 return string.data() ? string.data() : &zero;
78 }
79
80 static bool isSchemeFirstChar(char c)
81 {
82 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
83 }
84
85 static bool isSchemeChar(char c)
86 {
87 return isSchemeFirstChar(c) || (c >= '0' && c <= '9') || c == '.' || c == '- ' || c == '+';
88 }
89
90 static bool isUnicodeEncoding(const WTF::TextEncoding* encoding)
91 {
92 return encoding->encodingForFormSubmission() == UTF8Encoding();
93 }
94
95 namespace {
96
97 class KURLCharsetConverter : public url_canon::CharsetConverter {
98 public:
99 // The encoding parameter may be 0, but in this case the object must not be called.
100 explicit KURLCharsetConverter(const WTF::TextEncoding* encoding)
101 : m_encoding(encoding)
102 {
103 }
104
105 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLe ngth, url_canon::CanonOutput* output)
106 {
107 CString encoded = m_encoding->normalizeAndEncode(String(input, inputLeng th), WTF::URLEncodedEntitiesForUnencodables);
108 output->Append(encoded.data(), static_cast<int>(encoded.length()));
109 }
110
111 private:
112 const WTF::TextEncoding* m_encoding;
113 };
114
115 } // namespace
116
117 bool isValidProtocol(const String& protocol)
118 {
119 // RFC3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
120 if (protocol.isEmpty())
121 return false;
122 if (!isSchemeFirstChar(protocol[0]))
123 return false;
124 unsigned protocolLength = protocol.length();
125 for (unsigned i = 1; i < protocolLength; i++) {
126 if (!isSchemeChar(protocol[i]))
127 return false;
128 }
129 return true;
130 }
131
132 String KURL::strippedForUseAsReferrer() const
133 {
134 KURL referrer(*this);
135 referrer.setUser(String());
136 referrer.setPass(String());
137 referrer.removeFragmentIdentifier();
138 return referrer.string();
139 }
140
141 bool KURL::isLocalFile() const
142 {
143 // Including feed here might be a bad idea since drag and drop uses this che ck
144 // and including feed would allow feeds to potentially let someone's blog
145 // read the contents of the clipboard on a drag, even without a drop.
146 // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function.
147 return protocolIs("file");
148 }
149
150 bool protocolIsJavaScript(const String& url)
151 {
152 return protocolIs(url, "javascript");
153 }
154
155 const KURL& blankURL()
156 {
157 DEFINE_STATIC_LOCAL(KURL, staticBlankURL, (ParsedURLString, "about:blank"));
158 return staticBlankURL;
159 }
160
161 bool KURL::isBlankURL() const
162 {
163 return protocolIs("about");
164 }
165
166 String KURL::elidedString() const
167 {
168 if (string().length() <= 1024)
169 return string();
170
171 return string().left(511) + "..." + string().right(510);
172 }
173
174 // Initializes with a string representing an absolute URL. No encoding
175 // information is specified. This generally happens when a KURL is converted
176 // to a string and then converted back. In this case, the URL is already
177 // canonical and in proper escaped form so needs no encoding. We treat it as
178 // UTF-8 just in case.
179 KURL::KURL(ParsedURLStringTag, const String& url)
180 {
181 if (!url.isNull())
182 init(KURL(), url, 0);
183 else {
184 // WebCore expects us to preserve the nullness of strings when this
185 // constructor is used. In all other cases, it expects a non-null
186 // empty string, which is what init() will create.
187 m_isValid = false;
188 m_protocolIsInHTTPFamily = false;
189 }
190 }
191
192 KURL KURL::createIsolated(ParsedURLStringTag, const String& url)
193 {
194 // FIXME: We should be able to skip this extra copy and created an
195 // isolated KURL more efficiently.
196 return KURL(ParsedURLString, url).copy();
197 }
198
199 // Constructs a new URL given a base URL and a possibly relative input URL.
200 // This assumes UTF-8 encoding.
201 KURL::KURL(const KURL& base, const String& relative)
202 {
203 init(base, relative, 0);
204 }
205
206 // Constructs a new URL given a base URL and a possibly relative input URL.
207 // Any query portion of the relative URL will be encoded in the given encoding.
208 KURL::KURL(const KURL& base, const String& relative, const WTF::TextEncoding& en coding)
209 {
210 init(base, relative, &encoding.encodingForFormSubmission());
211 }
212
213 KURL::KURL(const AtomicString& canonicalString, const url_parse::Parsed& parsed, bool isValid)
214 : m_isValid(isValid)
215 , m_protocolIsInHTTPFamily(false)
216 , m_parsed(parsed)
217 , m_string(canonicalString)
218 {
219 initProtocolIsInHTTPFamily();
220 initInnerURL();
221 }
222
223 KURL::KURL(WTF::HashTableDeletedValueType)
224 : m_isValid(false)
225 , m_protocolIsInHTTPFamily(false)
226 , m_string(WTF::HashTableDeletedValue)
227 {
228 }
229
230 KURL::KURL(const KURL& other)
231 : m_isValid(other.m_isValid)
232 , m_protocolIsInHTTPFamily(other.m_protocolIsInHTTPFamily)
233 , m_parsed(other.m_parsed)
234 , m_string(other.m_string)
235 {
236 if (other.m_innerURL.get())
237 m_innerURL = adoptPtr(new KURL(other.m_innerURL->copy()));
238 }
239
240 KURL& KURL::operator=(const KURL& other)
241 {
242 m_isValid = other.m_isValid;
243 m_protocolIsInHTTPFamily = other.m_protocolIsInHTTPFamily;
244 m_parsed = other.m_parsed;
245 m_string = other.m_string;
246 if (other.m_innerURL)
247 m_innerURL = adoptPtr(new KURL(other.m_innerURL->copy()));
248 else
249 m_innerURL.clear();
250 return *this;
251 }
252
253 KURL KURL::copy() const
254 {
255 KURL result;
256 result.m_isValid = m_isValid;
257 result.m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily;
258 result.m_parsed = m_parsed;
259 result.m_string = m_string.isolatedCopy();
260 if (result.m_innerURL)
261 result.m_innerURL = adoptPtr(new KURL(m_innerURL->copy()));
262 return result;
263 }
264
265 bool KURL::isNull() const
266 {
267 return m_string.isNull();
268 }
269
270 bool KURL::isEmpty() const
271 {
272 return m_string.isEmpty();
273 }
274
275 bool KURL::isValid() const
276 {
277 return m_isValid;
278 }
279
280 bool KURL::hasPort() const
281 {
282 return hostEnd() < pathStart();
283 }
284
285 bool KURL::protocolIsInHTTPFamily() const
286 {
287 return m_protocolIsInHTTPFamily;
288 }
289
290 bool KURL::hasPath() const
291 {
292 // Note that http://www.google.com/" has a path, the path is "/". This can
293 // return false only for invalid or nonstandard URLs.
294 return m_parsed.path.len >= 0;
295 }
296
297 // We handle "parameters" separated by a semicolon, while KURL.cpp does not,
298 // which can lead to different results in some cases.
299 String KURL::lastPathComponent() const
300 {
301 if (!m_isValid)
302 return stringForInvalidComponent();
303 ASSERT(!m_string.isNull());
304
305 // When the output ends in a slash, WebCore has different expectations than
306 // the GoogleURL library. For "/foo/bar/" the library will return the empty
307 // string, but WebCore wants "bar".
308 url_parse::Component path = m_parsed.path;
309 if (path.len > 0 && m_string[path.end() - 1] == '/')
310 path.len--;
311
312 url_parse::Component file;
313 if (m_string.is8Bit())
314 url_parse::ExtractFileName(asURLChar8Subtle(m_string), path, &file);
315 else
316 url_parse::ExtractFileName(m_string.characters16(), path, &file);
317
318 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
319 // a null string when the path is empty, which we duplicate here.
320 if (!file.is_nonempty())
321 return String();
322 return componentString(file);
323 }
324
325 String KURL::protocol() const
326 {
327 return componentString(m_parsed.scheme);
328 }
329
330 String KURL::host() const
331 {
332 return componentString(m_parsed.host);
333 }
334
335 // Returns 0 when there is no port.
336 //
337 // We treat URL's with out-of-range port numbers as invalid URLs, and they will
338 // be rejected by the canonicalizer. KURL.cpp will allow them in parsing, but
339 // return invalidPortNumber from this port() function, so we mirror that behavio r here.
340 unsigned short KURL::port() const
341 {
342 if (!m_isValid || m_parsed.port.len <= 0)
343 return 0;
344 ASSERT(!m_string.isNull());
345 int port = m_string.is8Bit() ?
346 url_parse::ParsePort(asURLChar8Subtle(m_string), m_parsed.port) :
347 url_parse::ParsePort(m_string.characters16(), m_parsed.port);
348 ASSERT(port != url_parse::PORT_UNSPECIFIED); // Checked port.len <= 0 before .
349
350 if (port == url_parse::PORT_INVALID || port > maximumValidPortNumber) // Mim ic KURL::port()
351 port = invalidPortNumber;
352
353 return static_cast<unsigned short>(port);
354 }
355
356 String KURL::pass() const
357 {
358 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
359 // a null string when the password is empty, which we duplicate here.
360 if (!m_parsed.password.is_nonempty())
361 return String();
362 return componentString(m_parsed.password);
363 }
364
365 String KURL::user() const
366 {
367 return componentString(m_parsed.username);
368 }
369
370 String KURL::fragmentIdentifier() const
371 {
372 // Empty but present refs ("foo.com/bar#") should result in the empty
373 // string, which componentString will produce. Nonexistent refs
374 // should be the null string.
375 if (!m_parsed.ref.is_valid())
376 return String();
377 return componentString(m_parsed.ref);
378 }
379
380 bool KURL::hasFragmentIdentifier() const
381 {
382 return m_parsed.ref.len >= 0;
383 }
384
385 String KURL::baseAsString() const
386 {
387 // FIXME: There is probably a more efficient way to do this?
388 return m_string.left(pathAfterLastSlash());
389 }
390
391 String KURL::query() const
392 {
393 if (m_parsed.query.len >= 0)
394 return componentString(m_parsed.query);
395
396 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
397 // an empty string when the query is empty rather than a null (not sure
398 // which is right).
399 // Returns a null if the query is not specified, instead of empty.
400 if (m_parsed.query.is_valid())
401 return emptyString();
402 return String();
403 }
404
405 String KURL::path() const
406 {
407 return componentString(m_parsed.path);
408 }
409
410 bool KURL::setProtocol(const String& protocol)
411 {
412 // Firefox and IE remove everything after the first ':'.
413 int separatorPosition = protocol.find(':');
414 String newProtocol = protocol.substring(0, separatorPosition);
415 StringUTF8Adaptor newProtocolUTF8(newProtocol);
416
417 // If KURL is given an invalid scheme, it returns failure without modifying
418 // the URL at all. This is in contrast to most other setters which modify
419 // the URL and set "m_isValid."
420 url_canon::RawCanonOutputT<char> canonProtocol;
421 url_parse::Component protocolComponent;
422 if (!url_canon::CanonicalizeScheme(newProtocolUTF8.data(), url_parse::Compon ent(0, newProtocolUTF8.length()), &canonProtocol, &protocolComponent)
423 || !protocolComponent.is_nonempty())
424 return false;
425
426 url_canon::Replacements<char> replacements;
427 replacements.SetScheme(charactersOrEmpty(newProtocolUTF8), url_parse::Compon ent(0, newProtocolUTF8.length()));
428 replaceComponents(replacements);
429
430 // isValid could be false but we still return true here. This is because
431 // WebCore or JS scripts can build up a URL by setting individual
432 // components, and a JS exception is based on the return value of this
433 // function. We want to throw the exception and stop the script only when
434 // its trying to set a bad protocol, and not when it maybe just hasn't
435 // finished building up its final scheme.
436 return true;
437 }
438
439 void KURL::setHost(const String& host)
440 {
441 StringUTF8Adaptor hostUTF8(host);
442 url_canon::Replacements<char> replacements;
443 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0, ho stUTF8.length()));
444 replaceComponents(replacements);
445 }
446
447 static String parsePortFromStringPosition(const String& value, unsigned portStar t)
448 {
449 // "008080junk" needs to be treated as port "8080" and "000" as "0".
450 size_t length = value.length();
451 unsigned portEnd = portStart;
452 while (isASCIIDigit(value[portEnd]) && portEnd < length)
453 ++portEnd;
454 while (value[portStart] == '0' && portStart < portEnd - 1)
455 ++portStart;
456
457 // Required for backwards compat.
458 // https://www.w3.org/Bugs/Public/show_bug.cgi?id=23463
459 if (portStart == portEnd)
460 return "0";
461
462 return value.substring(portStart, portEnd - portStart);
463 }
464
465 void KURL::setHostAndPort(const String& hostAndPort)
466 {
467 size_t separator = hostAndPort.find(':');
468 if (!separator)
469 return;
470
471 if (separator == kNotFound) {
472 url_canon::Replacements<char> replacements;
473 StringUTF8Adaptor hostUTF8(hostAndPort);
474 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0 , hostUTF8.length()));
475 replaceComponents(replacements);
476 return;
477 }
478
479 String host = hostAndPort.substring(0, separator);
480 String port = parsePortFromStringPosition(hostAndPort, separator + 1);
481
482 StringUTF8Adaptor hostUTF8(host);
483 StringUTF8Adaptor portUTF8(port);
484
485 url_canon::Replacements<char> replacements;
486 replacements.SetHost(charactersOrEmpty(hostUTF8), url_parse::Component(0, ho stUTF8.length()));
487 replacements.SetPort(charactersOrEmpty(portUTF8), url_parse::Component(0, po rtUTF8.length()));
488 replaceComponents(replacements);
489 }
490
491 void KURL::removePort()
492 {
493 if (!hasPort())
494 return;
495 url_canon::Replacements<char> replacements;
496 replacements.ClearPort();
497 replaceComponents(replacements);
498 }
499
500 void KURL::setPort(const String& port)
501 {
502 String parsedPort = parsePortFromStringPosition(port, 0);
503 setPort(parsedPort.toUInt());
504 }
505
506 void KURL::setPort(unsigned short port)
507 {
508 if (isDefaultPortForProtocol(port, protocol())) {
509 removePort();
510 return;
511 }
512
513 String portString = String::number(port);
514 ASSERT(portString.is8Bit());
515
516 url_canon::Replacements<char> replacements;
517 replacements.SetPort(reinterpret_cast<const char*>(portString.characters8()) , url_parse::Component(0, portString.length()));
518 replaceComponents(replacements);
519 }
520
521 void KURL::setUser(const String& user)
522 {
523 // This function is commonly called to clear the username, which we
524 // normally don't have, so we optimize this case.
525 if (user.isEmpty() && !m_parsed.username.is_valid())
526 return;
527
528 // The canonicalizer will clear any usernames that are empty, so we
529 // don't have to explicitly call ClearUsername() here.
530 StringUTF8Adaptor userUTF8(user);
531 url_canon::Replacements<char> replacements;
532 replacements.SetUsername(charactersOrEmpty(userUTF8), url_parse::Component(0 , userUTF8.length()));
533 replaceComponents(replacements);
534 }
535
536 void KURL::setPass(const String& pass)
537 {
538 // This function is commonly called to clear the password, which we
539 // normally don't have, so we optimize this case.
540 if (pass.isEmpty() && !m_parsed.password.is_valid())
541 return;
542
543 // The canonicalizer will clear any passwords that are empty, so we
544 // don't have to explicitly call ClearUsername() here.
545 StringUTF8Adaptor passUTF8(pass);
546 url_canon::Replacements<char> replacements;
547 replacements.SetPassword(charactersOrEmpty(passUTF8), url_parse::Component(0 , passUTF8.length()));
548 replaceComponents(replacements);
549 }
550
551 void KURL::setFragmentIdentifier(const String& fragment)
552 {
553 // This function is commonly called to clear the ref, which we
554 // normally don't have, so we optimize this case.
555 if (fragment.isNull() && !m_parsed.ref.is_valid())
556 return;
557
558 StringUTF8Adaptor fragmentUTF8(fragment);
559
560 url_canon::Replacements<char> replacements;
561 if (fragment.isNull())
562 replacements.ClearRef();
563 else
564 replacements.SetRef(charactersOrEmpty(fragmentUTF8), url_parse::Componen t(0, fragmentUTF8.length()));
565 replaceComponents(replacements);
566 }
567
568 void KURL::removeFragmentIdentifier()
569 {
570 url_canon::Replacements<char> replacements;
571 replacements.ClearRef();
572 replaceComponents(replacements);
573 }
574
575 void KURL::setQuery(const String& query)
576 {
577 StringUTF8Adaptor queryUTF8(query);
578 url_canon::Replacements<char> replacements;
579 if (query.isNull()) {
580 // KURL.cpp sets to null to clear any query.
581 replacements.ClearQuery();
582 } else if (query.length() > 0 && query[0] == '?') {
583 // WebCore expects the query string to begin with a question mark, but
584 // GoogleURL doesn't. So we trim off the question mark when setting.
585 replacements.SetQuery(charactersOrEmpty(queryUTF8), url_parse::Component (1, queryUTF8.length() - 1));
586 } else {
587 // When set with the empty string or something that doesn't begin with
588 // a question mark, KURL.cpp will add a question mark for you. The only
589 // way this isn't compatible is if you call this function with an empty
590 // string. KURL.cpp will leave a '?' with nothing following it in the
591 // URL, whereas we'll clear it.
592 // FIXME We should eliminate this difference.
593 replacements.SetQuery(charactersOrEmpty(queryUTF8), url_parse::Component (0, queryUTF8.length()));
594 }
595 replaceComponents(replacements);
596 }
597
598 void KURL::setPath(const String& path)
599 {
600 // Empty paths will be canonicalized to "/", so we don't have to worry
601 // about calling ClearPath().
602 StringUTF8Adaptor pathUTF8(path);
603 url_canon::Replacements<char> replacements;
604 replacements.SetPath(charactersOrEmpty(pathUTF8), url_parse::Component(0, pa thUTF8.length()));
605 replaceComponents(replacements);
606 }
607
608 String decodeURLEscapeSequences(const String& string)
609 {
610 return decodeURLEscapeSequences(string, UTF8Encoding());
611 }
612
613 // In KURL.cpp's implementation, this is called by every component getter.
614 // It will unescape every character, including '\0'. This is scary, and may
615 // cause security holes. We never call this function for components, and
616 // just return the ASCII versions instead.
617 //
618 // This function is also used to decode javascript: URLs and as a general
619 // purpose unescaping function.
620 //
621 // FIXME These should be merged to the KURL.cpp implementation.
622 String decodeURLEscapeSequences(const String& string, const WTF::TextEncoding& e ncoding)
623 {
624 // FIXME We can probably use KURL.cpp's version of this function
625 // without modification. However, I'm concerned about
626 // https://bugs.webkit.org/show_bug.cgi?id=20559 so am keeping this old
627 // custom code for now. Using their version will also fix the bug that
628 // we ignore the encoding.
629 //
630 // FIXME b/1350291: This does not get called very often. We just convert
631 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of
632 // sucks, and we don't use the encoding properly, which will make some
633 // obscure anchor navigations fail.
634 StringUTF8Adaptor stringUTF8(string);
635 url_canon::RawCanonOutputT<url_parse::UTF16Char> unescaped;
636 url_util::DecodeURLEscapeSequences(stringUTF8.data(), stringUTF8.length(), & unescaped);
637 return StringImpl::create8BitIfPossible(reinterpret_cast<UChar*>(unescaped.d ata()), unescaped.length());
638 }
639
640 String encodeWithURLEscapeSequences(const String& notEncodedString)
641 {
642 CString utf8 = UTF8Encoding().normalizeAndEncode(notEncodedString, WTF::URLE ncodedEntitiesForUnencodables);
643
644 url_canon::RawCanonOutputT<char> buffer;
645 int inputLength = utf8.length();
646 if (buffer.length() < inputLength * 3)
647 buffer.Resize(inputLength * 3);
648
649 url_util::EncodeURIComponent(utf8.data(), inputLength, &buffer);
650 String escaped(buffer.data(), buffer.length());
651 // Unescape '/'; it's safe and much prettier.
652 escaped.replace("%2F", "/");
653 return escaped;
654 }
655
656 bool KURL::isHierarchical() const
657 {
658 if (m_string.isNull() || !m_parsed.scheme.is_nonempty())
659 return false;
660 return m_string.is8Bit() ?
661 url_util::IsStandard(asURLChar8Subtle(m_string), m_parsed.scheme) :
662 url_util::IsStandard(m_string.characters16(), m_parsed.scheme);
663 }
664
665 #ifndef NDEBUG
666 void KURL::print() const
667 {
668 printf("%s\n", m_string.utf8().data());
669 }
670 #endif
671
672 bool equalIgnoringFragmentIdentifier(const KURL& a, const KURL& b)
673 {
674 // Compute the length of each URL without its ref. Note that the reference
675 // begin (if it exists) points to the character *after* the '#', so we need
676 // to subtract one.
677 int aLength = a.m_string.length();
678 if (a.m_parsed.ref.len >= 0)
679 aLength = a.m_parsed.ref.begin - 1;
680
681 int bLength = b.m_string.length();
682 if (b.m_parsed.ref.len >= 0)
683 bLength = b.m_parsed.ref.begin - 1;
684
685 if (aLength != bLength)
686 return false;
687
688 const String& aString = a.m_string;
689 const String& bString = b.m_string;
690 // FIXME: Abstraction this into a function in WTFString.h.
691 for (int i = 0; i < aLength; ++i) {
692 if (aString[i] != bString[i])
693 return false;
694 }
695 return true;
696 }
697
698 unsigned KURL::hostStart() const
699 {
700 return m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false);
701 }
702
703 unsigned KURL::hostEnd() const
704 {
705 return m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true);
706 }
707
708 unsigned KURL::pathStart() const
709 {
710 return m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);
711 }
712
713 unsigned KURL::pathEnd() const
714 {
715 return m_parsed.CountCharactersBefore(url_parse::Parsed::QUERY, true);
716 }
717
718 unsigned KURL::pathAfterLastSlash() const
719 {
720 if (m_string.isNull())
721 return 0;
722 if (!m_isValid || !m_parsed.path.is_valid())
723 return m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);
724 url_parse::Component filename;
725 if (m_string.is8Bit())
726 url_parse::ExtractFileName(asURLChar8Subtle(m_string), m_parsed.path, &f ilename);
727 else
728 url_parse::ExtractFileName(m_string.characters16(), m_parsed.path, &file name);
729 return filename.begin;
730 }
731
732 bool protocolIs(const String& url, const char* protocol)
733 {
734 assertProtocolIsGood(protocol);
735 if (url.isNull())
736 return false;
737 if (url.is8Bit())
738 return url_util::FindAndCompareScheme(asURLChar8Subtle(url), url.length( ), protocol, 0);
739 return url_util::FindAndCompareScheme(url.characters16(), url.length(), prot ocol, 0);
740 }
741
742 void KURL::init(const KURL& base, const String& relative, const WTF::TextEncodin g* queryEncoding)
743 {
744 if (!relative.isNull() && relative.is8Bit()) {
745 StringUTF8Adaptor relativeUTF8(relative);
746 init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding);
747 } else
748 init(base, relative.characters16(), relative.length(), queryEncoding);
749 initProtocolIsInHTTPFamily();
750 initInnerURL();
751 }
752
753 template <typename CHAR>
754 void KURL::init(const KURL& base, const CHAR* relative, int relativeLength, cons t WTF::TextEncoding* queryEncoding)
755 {
756 // As a performance optimization, we do not use the charset converter
757 // if encoding is UTF-8 or other Unicode encodings. Note that this is
758 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more
759 // efficient with no charset converter object because it can do UTF-8
760 // internally with no extra copies.
761
762 // We feel free to make the charset converter object every time since it's
763 // just a wrapper around a reference.
764 KURLCharsetConverter charsetConverterObject(queryEncoding);
765 KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncodin g(queryEncoding)) ? 0 : &charsetConverterObject;
766
767 StringUTF8Adaptor baseUTF8(base.string());
768
769 url_canon::RawCanonOutputT<char> output;
770 m_isValid = url_util::ResolveRelative(baseUTF8.data(), baseUTF8.length(), ba se.m_parsed, relative, relativeLength, charsetConverter, &output, &m_parsed);
771
772 // See FIXME in KURLPrivate in the header. If canonicalization has not
773 // changed the string, we can avoid an extra allocation by using assignment.
774 m_string = AtomicString::fromUTF8(output.data(), output.length());
775 }
776
777 void KURL::initInnerURL()
778 {
779 if (!m_isValid) {
780 m_innerURL.clear();
781 return;
782 }
783 if (url_parse::Parsed* innerParsed = m_parsed.inner_parsed())
784 m_innerURL = adoptPtr(new KURL(ParsedURLString, m_string.substring(inner Parsed->scheme.begin, innerParsed->Length() - innerParsed->scheme.begin)));
785 else
786 m_innerURL.clear();
787 }
788
789 template<typename CHAR>
790 bool internalProtocolIs(const url_parse::Component& scheme, const CHAR* spec, co nst char* protocol)
791 {
792 const CHAR* begin = spec + scheme.begin;
793 const CHAR* end = begin + scheme.len;
794
795 while (begin != end && *protocol) {
796 ASSERT(toASCIILower(*protocol) == *protocol);
797 if (toASCIILower(*begin++) != *protocol++)
798 return false;
799 }
800
801 // Both strings are equal (ignoring case) if and only if all of the characte rs were equal,
802 // and the end of both has been reached.
803 return begin == end && !*protocol;
804 }
805
806 template<typename CHAR>
807 bool checkIfProtocolIsInHTTPFamily(const url_parse::Component& scheme, const CHA R* spec)
808 {
809 if (scheme.len == 4)
810 return internalProtocolIs(scheme, spec, "http");
811 if (scheme.len == 5)
812 return internalProtocolIs(scheme, spec, "https");
813 return false;
814 }
815
816 void KURL::initProtocolIsInHTTPFamily()
817 {
818 if (!m_isValid) {
819 m_protocolIsInHTTPFamily = false;
820 return;
821 }
822
823 ASSERT(!m_string.isNull());
824 m_protocolIsInHTTPFamily = m_string.is8Bit() ?
825 checkIfProtocolIsInHTTPFamily(m_parsed.scheme, m_string.characters8()) :
826 checkIfProtocolIsInHTTPFamily(m_parsed.scheme, m_string.characters16());
827 }
828
829 bool KURL::protocolIs(const char* protocol) const
830 {
831 assertProtocolIsGood(protocol);
832
833 // JavaScript URLs are "valid" and should be executed even if KURL decides t hey are invalid.
834 // The free function protocolIsJavaScript() should be used instead.
835 // FIXME: Chromium code needs to be fixed for this assert to be enabled. ASS ERT(strcmp(protocol, "javascript"));
836
837 if (m_string.isNull() || m_parsed.scheme.len <= 0)
838 return *protocol == '\0';
839
840 return m_string.is8Bit() ?
841 internalProtocolIs(m_parsed.scheme, m_string.characters8(), protocol) :
842 internalProtocolIs(m_parsed.scheme, m_string.characters16(), protocol);
843 }
844
845 String KURL::stringForInvalidComponent() const
846 {
847 if (m_string.isNull())
848 return String();
849 return emptyString();
850 }
851
852 String KURL::componentString(const url_parse::Component& component) const
853 {
854 if (!m_isValid || component.len <= 0)
855 return stringForInvalidComponent();
856 // begin and len are in terms of bytes which do not match
857 // if string() is UTF-16 and input contains non-ASCII characters.
858 // However, the only part in urlString that can contain non-ASCII
859 // characters is 'ref' at the end of the string. In that case,
860 // begin will always match the actual value and len (in terms of
861 // byte) will be longer than what's needed by 'mid'. However, mid
862 // truncates len to avoid go past the end of a string so that we can
863 // get away without doing anything here.
864 return string().substring(component.begin, component.len);
865 }
866
867 template<typename CHAR>
868 void KURL::replaceComponents(const url_canon::Replacements<CHAR>& replacements)
869 {
870 url_canon::RawCanonOutputT<char> output;
871 url_parse::Parsed newParsed;
872
873 StringUTF8Adaptor utf8(m_string);
874 m_isValid = url_util::ReplaceComponents(utf8.data(), utf8.length(), m_parsed , replacements, 0, &output, &newParsed);
875
876 m_parsed = newParsed;
877 m_string = AtomicString::fromUTF8(output.data(), output.length());
878 }
879
880 bool KURL::isSafeToSendToAnotherThread() const
881 {
882 return m_string.isSafeToSendToAnotherThread()
883 && (!m_innerURL || m_innerURL->isSafeToSendToAnotherThread());
884 }
885
886 } // namespace WebCore
OLDNEW
« no previous file with comments | « Source/weborigin/KURL.h ('k') | Source/weborigin/KURLHash.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698