| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2008, 2009, Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer | 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the | 12 * in the documentation and/or other materials provided with the |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 50 // canonicalizer. | 50 // canonicalizer. |
| 51 class KURLCharsetConverter : public url_canon::CharsetConverter { | 51 class KURLCharsetConverter : public url_canon::CharsetConverter { |
| 52 public: | 52 public: |
| 53 // The encoding parameter may be NULL, but in this case the object must not | 53 // The encoding parameter may be NULL, but in this case the object must not |
| 54 // be called. | 54 // be called. |
| 55 KURLCharsetConverter(const TextEncoding* encoding) | 55 KURLCharsetConverter(const TextEncoding* encoding) |
| 56 : m_encoding(encoding) | 56 : m_encoding(encoding) |
| 57 { | 57 { |
| 58 } | 58 } |
| 59 | 59 |
| 60 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int input_l
en, | 60 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLe
ngth, |
| 61 url_canon::CanonOutput* output) | 61 url_canon::CanonOutput* output) |
| 62 { | 62 { |
| 63 CString encoded = m_encoding->encode(input, input_len, URLEncodedEntitie
sForUnencodables); | 63 CString encoded = m_encoding->encode(input, inputLength, URLEncodedEntit
iesForUnencodables); |
| 64 output->Append(encoded.data(), static_cast<int>(encoded.length())); | 64 output->Append(encoded.data(), static_cast<int>(encoded.length())); |
| 65 } | 65 } |
| 66 | 66 |
| 67 private: | 67 private: |
| 68 const TextEncoding* m_encoding; | 68 const TextEncoding* m_encoding; |
| 69 }; | 69 }; |
| 70 | 70 |
| 71 // Note that this function must be named differently than the one in KURL.cpp | 71 // Note that this function must be named differently than the one in KURL.cpp |
| 72 // since our unit tests evilly include both files, and their local definition | 72 // since our unit tests evilly include both files, and their local definition |
| 73 // will be ambiguous. | 73 // will be ambiguous. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 93 &zero; | 93 &zero; |
| 94 } | 94 } |
| 95 | 95 |
| 96 static inline bool isUnicodeEncoding(const TextEncoding* encoding) | 96 static inline bool isUnicodeEncoding(const TextEncoding* encoding) |
| 97 { | 97 { |
| 98 return encoding->encodingForFormSubmission() == UTF8Encoding(); | 98 return encoding->encodingForFormSubmission() == UTF8Encoding(); |
| 99 } | 99 } |
| 100 | 100 |
| 101 static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char*
str) | 101 static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char*
str) |
| 102 { | 102 { |
| 103 while (begin != end) { | 103 while (begin != end && *str) { |
| 104 if (!*str) | |
| 105 return false; | |
| 106 ASSERT(isASCIILower(*str)); | 104 ASSERT(isASCIILower(*str)); |
| 107 if (toASCIILower(*begin++) != *str++) | 105 if (toASCIILower(*begin++) != *str++) |
| 108 return false; | 106 return false; |
| 109 } | 107 } |
| 110 return !*str; | 108 |
| 109 // Both strings are equal (ignoring case) if and only if all of the characte
rs were equal, |
| 110 // and the end of both has been reached. |
| 111 return begin == end && !*str; |
| 111 } | 112 } |
| 112 | 113 |
| 113 | 114 |
| 114 // KURLGooglePrivate ----------------------------------------------------------- | 115 // KURLGooglePrivate ----------------------------------------------------------- |
| 115 | 116 |
| 116 KURLGooglePrivate::KURLGooglePrivate() | 117 KURLGooglePrivate::KURLGooglePrivate() |
| 117 : m_isValid(false) | 118 : m_isValid(false) |
| 118 , m_protocolInHTTPFamily(false) | 119 , m_protocolInHTTPFamily(false) |
| 119 , m_utf8IsASCII(true) | 120 , m_utf8IsASCII(true) |
| 120 , m_stringIsValid(false) | 121 , m_stringIsValid(false) |
| 121 { | 122 { |
| 122 } | 123 } |
| 123 | 124 |
| 124 KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isVal
id) | 125 KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isVal
id) |
| 125 : m_isValid(isValid) | 126 : m_isValid(isValid) |
| 126 , m_protocolInHTTPFamily(false) | 127 , m_protocolInHTTPFamily(false) |
| 127 , m_parsed(parsed) | 128 , m_parsed(parsed) |
| 128 , m_utf8IsASCII(true) | 129 , m_utf8IsASCII(true) |
| 129 , m_stringIsValid(false) | 130 , m_stringIsValid(false) |
| 130 { | 131 { |
| 131 } | 132 } |
| 132 | 133 |
| 133 // Setters for the data. Using the ASCII version when you know the | 134 // Setters for the data. Using the ASCII version when you know the |
| 134 // data is ASCII will be slightly more efficient. The UTF-8 version | 135 // data is ASCII will be slightly more efficient. The UTF-8 version |
| 135 // will always be correct if the caller is unsure. | 136 // will always be correct if the caller is unsure. |
| 136 void KURLGooglePrivate::setUtf8(const char* data, int data_len) | 137 void KURLGooglePrivate::setUtf8(const char* data, int dataLength) |
| 137 { | 138 { |
| 138 // The m_utf8IsASCII must always be correct since the DeprecatedString | 139 // The m_utf8IsASCII must always be correct since the DeprecatedString |
| 139 // getter must create it with the proper constructor. This test can be | 140 // getter must create it with the proper constructor. This test can be |
| 140 // removed when DeprecatedString is gone, but it still might be a | 141 // removed when DeprecatedString is gone, but it still might be a |
| 141 // performance win. | 142 // performance win. |
| 142 m_utf8IsASCII = true; | 143 m_utf8IsASCII = true; |
| 143 for (int i = 0; i < data_len; i++) { | 144 for (int i = 0; i < dataLength; i++) { |
| 144 if (static_cast<unsigned char>(data[i]) >= 0x80) { | 145 if (static_cast<unsigned char>(data[i]) >= 0x80) { |
| 145 m_utf8IsASCII = false; | 146 m_utf8IsASCII = false; |
| 146 break; | 147 break; |
| 147 } | 148 } |
| 148 } | 149 } |
| 149 | 150 |
| 150 m_utf8 = CString(data, data_len); | 151 m_utf8 = CString(data, dataLength); |
| 151 m_stringIsValid = false; | 152 m_stringIsValid = false; |
| 152 initProtocolInHTTPFamily(); | 153 initProtocolInHTTPFamily(); |
| 153 } | 154 } |
| 154 | 155 |
| 155 void KURLGooglePrivate::setAscii(const char* data, int data_len) | 156 void KURLGooglePrivate::setAscii(const char* data, int dataLength) |
| 156 { | 157 { |
| 157 m_utf8 = CString(data, data_len); | 158 m_utf8 = CString(data, dataLength); |
| 158 m_utf8IsASCII = true; | 159 m_utf8IsASCII = true; |
| 159 m_stringIsValid = false; | 160 m_stringIsValid = false; |
| 160 initProtocolInHTTPFamily(); | 161 initProtocolInHTTPFamily(); |
| 161 } | 162 } |
| 162 | 163 |
| 163 void KURLGooglePrivate::init(const KURL& base, | 164 void KURLGooglePrivate::init(const KURL& base, |
| 164 const String& relative, | 165 const String& relative, |
| 165 const TextEncoding* queryEncoding) | 166 const TextEncoding* queryEncoding) |
| 166 { | 167 { |
| 167 init(base, relative.characters(), relative.length(), queryEncoding); | 168 init(base, relative.characters(), relative.length(), queryEncoding); |
| 168 } | 169 } |
| 169 | 170 |
| 170 // Note: code mostly duplicated below. | 171 // Note: code mostly duplicated below. |
| 171 void KURLGooglePrivate::init(const KURL& base, const char* rel, int rel_len, | 172 void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength, |
| 172 const TextEncoding* query_encoding) | 173 const TextEncoding* queryEncoding) |
| 173 { | 174 { |
| 174 // As a performance optimization, we do not use the charset converter if | 175 // As a performance optimization, we do not use the charset converter if |
| 175 // encoding is UTF-8 or other Unicode encodings. Note that this is | 176 // encoding is UTF-8 or other Unicode encodings. Note that this is |
| 176 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be | 177 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be |
| 177 // more efficient with no charset converter object because it | 178 // more efficient with no charset converter object because it |
| 178 // can do UTF-8 internally with no extra copies. | 179 // can do UTF-8 internally with no extra copies. |
| 179 | 180 |
| 180 // We feel free to make the charset converter object every time since it's | 181 // We feel free to make the charset converter object every time since it's |
| 181 // just a wrapper around a reference. | 182 // just a wrapper around a reference. |
| 182 KURLCharsetConverter charset_converter_object(query_encoding); | 183 KURLCharsetConverter charsetConverterObject(queryEncoding); |
| 183 KURLCharsetConverter* charset_converter = | 184 KURLCharsetConverter* charsetConverter = |
| 184 (!query_encoding || isUnicodeEncoding(query_encoding)) ? 0 : | 185 (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : |
| 185 &charset_converter_object; | 186 &charsetConverterObject; |
| 186 | 187 |
| 187 url_canon::RawCanonOutputT<char> output; | 188 url_canon::RawCanonOutputT<char> output; |
| 188 const CString& baseStr = base.m_url.utf8String(); | 189 const CString& baseStr = base.m_url.utf8String(); |
| 189 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), | 190 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), |
| 190 base.m_url.m_parsed, rel, rel_len, | 191 base.m_url.m_parsed, rel, relLength, |
| 191 charset_converter, | 192 charsetConverter, |
| 192 &output, &m_parsed); | 193 &output, &m_parsed); |
| 193 | 194 |
| 194 // See FIXME in KURLGooglePrivate in the header. If canonicalization has not | 195 // See FIXME in KURLGooglePrivate in the header. If canonicalization has not |
| 195 // changed the string, we can avoid an extra allocation by using assignment. | 196 // changed the string, we can avoid an extra allocation by using assignment. |
| 196 // | 197 // |
| 197 // When KURL encounters an error such that the URL is invalid and empty | 198 // When KURL encounters an error such that the URL is invalid and empty |
| 198 // (for example, resolving a relative URL on a non-hierarchical base), it | 199 // (for example, resolving a relative URL on a non-hierarchical base), it |
| 199 // will produce an isNull URL, and calling setUtf8 will produce an empty | 200 // will produce an isNull URL, and calling setUtf8 will produce an empty |
| 200 // non-null URL. This is unlikely to affect anything, but we preserve this | 201 // non-null URL. This is unlikely to affect anything, but we preserve this |
| 201 // just in case. | 202 // just in case. |
| 202 if (m_isValid || output.length()) { | 203 if (m_isValid || output.length()) { |
| 203 // Without ref, the whole url is guaranteed to be ASCII-only. | 204 // Without ref, the whole url is guaranteed to be ASCII-only. |
| 204 if (m_parsed.ref.is_nonempty()) | 205 if (m_parsed.ref.is_nonempty()) |
| 205 setUtf8(output.data(), output.length()); | 206 setUtf8(output.data(), output.length()); |
| 206 else | 207 else |
| 207 setAscii(output.data(), output.length()); | 208 setAscii(output.data(), output.length()); |
| 208 } else { | 209 } else { |
| 209 // WebCore expects resolved URLs to be empty rather than NULL. | 210 // WebCore expects resolved URLs to be empty rather than NULL. |
| 210 setUtf8("", 0); | 211 setUtf8("", 0); |
| 211 } | 212 } |
| 212 } | 213 } |
| 213 | 214 |
| 214 // Note: code mostly duplicated above. See FIXMEs and comments there. | 215 // Note: code mostly duplicated above. See FIXMEs and comments there. |
| 215 void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int rel_len, | 216 void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength, |
| 216 const TextEncoding* query_encoding) | 217 const TextEncoding* queryEncoding) |
| 217 { | 218 { |
| 218 KURLCharsetConverter charset_converter_object(query_encoding); | 219 KURLCharsetConverter charsetConverterObject(queryEncoding); |
| 219 KURLCharsetConverter* charset_converter = | 220 KURLCharsetConverter* charsetConverter = |
| 220 (!query_encoding || isUnicodeEncoding(query_encoding)) ? 0 : | 221 (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : |
| 221 &charset_converter_object; | 222 &charsetConverterObject; |
| 222 | 223 |
| 223 url_canon::RawCanonOutputT<char> output; | 224 url_canon::RawCanonOutputT<char> output; |
| 224 const CString& baseStr = base.m_url.utf8String(); | 225 const CString& baseStr = base.m_url.utf8String(); |
| 225 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), | 226 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), |
| 226 base.m_url.m_parsed, rel, rel_len, | 227 base.m_url.m_parsed, rel, relLength, |
| 227 charset_converter, | 228 charsetConverter, |
| 228 &output, &m_parsed); | 229 &output, &m_parsed); |
| 229 | 230 |
| 230 | 231 |
| 231 if (m_isValid || output.length()) { | 232 if (m_isValid || output.length()) { |
| 232 if (m_parsed.ref.is_nonempty()) | 233 if (m_parsed.ref.is_nonempty()) |
| 233 setUtf8(output.data(), output.length()); | 234 setUtf8(output.data(), output.length()); |
| 234 else | 235 else |
| 235 setAscii(output.data(), output.length()); | 236 setAscii(output.data(), output.length()); |
| 236 } else | 237 } else |
| 237 setUtf8("", 0); | 238 setUtf8("", 0); |
| 238 } | 239 } |
| 239 | 240 |
| 240 void KURLGooglePrivate::initProtocolInHTTPFamily() | 241 void KURLGooglePrivate::initProtocolInHTTPFamily() |
| 241 { | 242 { |
| 242 m_protocolInHTTPFamily = m_isValid | 243 if (!m_isValid) { |
| 243 && m_parsed.scheme.len >= 4 | 244 m_protocolInHTTPFamily = false; |
| 244 && toASCIILower(m_utf8.data()[0]) == 'h' | 245 return; |
| 245 && toASCIILower(m_utf8.data()[1]) == 't' | 246 } |
| 246 && toASCIILower(m_utf8.data()[2]) == 't' | 247 |
| 247 && toASCIILower(m_utf8.data()[3]) == 'p' | 248 const char* scheme = m_utf8.data() + m_parsed.scheme.begin; |
| 248 && (m_parsed.scheme.len == 4 | 249 if (m_parsed.scheme.len == 4) |
| 249 || (m_parsed.scheme.len == 5 && toASCIILower(m_utf8.data()[4]) == 's
')); | 250 m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 4, "http"
); |
| 251 else if (m_parsed.scheme.len == 5) |
| 252 m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 5, "https
"); |
| 253 else |
| 254 m_protocolInHTTPFamily = false; |
| 250 } | 255 } |
| 251 | 256 |
| 252 void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const | 257 void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const |
| 253 { | 258 { |
| 254 dest->m_isValid = m_isValid; | 259 dest->m_isValid = m_isValid; |
| 255 dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily; | 260 dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily; |
| 256 dest->m_parsed = m_parsed; | 261 dest->m_parsed = m_parsed; |
| 257 | 262 |
| 258 // Don't copy the 16-bit string since that will be regenerated as needed. | 263 // Don't copy the 16-bit string since that will be regenerated as needed. |
| 259 dest->m_utf8 = CString(m_utf8.data(), m_utf8.length()); | 264 dest->m_utf8 = CString(m_utf8.data(), m_utf8.length()); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 277 // begin will always match the actual value and len (in terms of | 282 // begin will always match the actual value and len (in terms of |
| 278 // byte) will be longer than what's needed by 'mid'. However, mid | 283 // byte) will be longer than what's needed by 'mid'. However, mid |
| 279 // truncates len to avoid go past the end of a string so that we can | 284 // truncates len to avoid go past the end of a string so that we can |
| 280 // get away withtout doing anything here. | 285 // get away withtout doing anything here. |
| 281 return string().substring(comp.begin, comp.len); | 286 return string().substring(comp.begin, comp.len); |
| 282 } | 287 } |
| 283 | 288 |
| 284 void KURLGooglePrivate::replaceComponents(const Replacements& replacements) | 289 void KURLGooglePrivate::replaceComponents(const Replacements& replacements) |
| 285 { | 290 { |
| 286 url_canon::RawCanonOutputT<char> output; | 291 url_canon::RawCanonOutputT<char> output; |
| 287 url_parse::Parsed new_parsed; | 292 url_parse::Parsed newParsed; |
| 288 | 293 |
| 289 m_isValid = url_util::ReplaceComponents(utf8String().data(), | 294 m_isValid = url_util::ReplaceComponents(utf8String().data(), |
| 290 utf8String().length(), m_parsed, replacements, NULL, &output, &new_parse
d); | 295 utf8String().length(), m_parsed, rep
lacements, 0, &output, &newParsed); |
| 291 | 296 |
| 292 m_parsed = new_parsed; | 297 m_parsed = newParsed; |
| 293 if (m_parsed.ref.is_nonempty()) | 298 if (m_parsed.ref.is_nonempty()) |
| 294 setUtf8(output.data(), output.length()); | 299 setUtf8(output.data(), output.length()); |
| 295 else | 300 else |
| 296 setAscii(output.data(), output.length()); | 301 setAscii(output.data(), output.length()); |
| 297 } | 302 } |
| 298 | 303 |
| 299 const String& KURLGooglePrivate::string() const | 304 const String& KURLGooglePrivate::string() const |
| 300 { | 305 { |
| 301 if (!m_stringIsValid) { | 306 if (!m_stringIsValid) { |
| 302 // Must special case the NULL case, since constructing the | 307 // Must special case the NULL case, since constructing the |
| (...skipping 13 matching lines...) Expand all Loading... |
| 316 // KURL ------------------------------------------------------------------------ | 321 // KURL ------------------------------------------------------------------------ |
| 317 | 322 |
| 318 // Creates with NULL-terminated string input representing an absolute URL. | 323 // Creates with NULL-terminated string input representing an absolute URL. |
| 319 // WebCore generally calls this only with hardcoded strings, so the input is | 324 // WebCore generally calls this only with hardcoded strings, so the input is |
| 320 // ASCII. We treat is as UTF-8 just in case. | 325 // ASCII. We treat is as UTF-8 just in case. |
| 321 KURL::KURL(const char *url) | 326 KURL::KURL(const char *url) |
| 322 { | 327 { |
| 323 // FIXME The Mac code checks for beginning with a slash and converting to a | 328 // FIXME The Mac code checks for beginning with a slash and converting to a |
| 324 // file: URL. We will want to add this as well once we can compile on a | 329 // file: URL. We will want to add this as well once we can compile on a |
| 325 // system like that. | 330 // system like that. |
| 326 m_url.init(KURL(), url, strlen(url), NULL); | 331 m_url.init(KURL(), url, strlen(url), 0); |
| 327 | 332 |
| 328 // The one-argument constructors should never generate a NULL string. | 333 // The one-argument constructors should never generate a NULL string. |
| 329 // This is a funny quirk of KURL.cpp (probably a bug) which we preserve. | 334 // This is a funny quirk of KURL.cpp (probably a bug) which we preserve. |
| 330 if (m_url.utf8String().isNull()) | 335 if (m_url.utf8String().isNull()) |
| 331 m_url.setAscii("", 0); | 336 m_url.setAscii("", 0); |
| 332 } | 337 } |
| 333 | 338 |
| 334 // Initializes with a string representing an absolute URL. No encoding | 339 // Initializes with a string representing an absolute URL. No encoding |
| 335 // information is specified. This generally happens when a KURL is converted | 340 // information is specified. This generally happens when a KURL is converted |
| 336 // to a string and then converted back. In this case, the URL is already | 341 // to a string and then converted back. In this case, the URL is already |
| 337 // canonical and in proper escaped form so needs no encoding. We treat it was | 342 // canonical and in proper escaped form so needs no encoding. We treat it was |
| 338 // UTF-8 just in case. | 343 // UTF-8 just in case. |
| 339 KURL::KURL(const String& url) | 344 KURL::KURL(const String& url) |
| 340 { | 345 { |
| 341 if (!url.isNull()) | 346 if (!url.isNull()) |
| 342 m_url.init(KURL(), url, NULL); | 347 m_url.init(KURL(), url, 0); |
| 343 else { | 348 else { |
| 344 // WebCore expects us to preserve the nullness of strings when this | 349 // WebCore expects us to preserve the nullness of strings when this |
| 345 // constructor is used. In all other cases, it expects a non-null | 350 // constructor is used. In all other cases, it expects a non-null |
| 346 // empty string, which is what init() will create. | 351 // empty string, which is what init() will create. |
| 347 m_url.m_isValid = false; | 352 m_url.m_isValid = false; |
| 348 m_url.m_protocolInHTTPFamily = false; | 353 m_url.m_protocolInHTTPFamily = false; |
| 349 } | 354 } |
| 350 } | 355 } |
| 351 | 356 |
| 352 // Constructs a new URL given a base URL and a possibly relative input URL. | 357 // Constructs a new URL given a base URL and a possibly relative input URL. |
| 353 // This assumes UTF-8 encoding. | 358 // This assumes UTF-8 encoding. |
| 354 KURL::KURL(const KURL& base, const String& relative) | 359 KURL::KURL(const KURL& base, const String& relative) |
| 355 { | 360 { |
| 356 m_url.init(base, relative, NULL); | 361 m_url.init(base, relative, 0); |
| 357 } | 362 } |
| 358 | 363 |
| 359 // Constructs a new URL given a base URL and a possibly relative input URL. | 364 // Constructs a new URL given a base URL and a possibly relative input URL. |
| 360 // Any query portion of the relative URL will be encoded in the given encoding. | 365 // Any query portion of the relative URL will be encoded in the given encoding. |
| 361 KURL::KURL(const KURL& base, | 366 KURL::KURL(const KURL& base, |
| 362 const String& relative, | 367 const String& relative, |
| 363 const TextEncoding& encoding) | 368 const TextEncoding& encoding) |
| 364 { | 369 { |
| 365 m_url.init(base, relative, &encoding.encodingForFormSubmission()); | 370 m_url.init(base, relative, &encoding.encodingForFormSubmission()); |
| 366 } | 371 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 380 #if PLATFORM(CF) | 385 #if PLATFORM(CF) |
| 381 KURL::KURL(CFURLRef) | 386 KURL::KURL(CFURLRef) |
| 382 { | 387 { |
| 383 notImplemented(); | 388 notImplemented(); |
| 384 invalidate(); | 389 invalidate(); |
| 385 } | 390 } |
| 386 | 391 |
| 387 CFURLRef KURL::createCFURL() const | 392 CFURLRef KURL::createCFURL() const |
| 388 { | 393 { |
| 389 notImplemented(); | 394 notImplemented(); |
| 390 return NULL; | 395 return 0; |
| 391 } | 396 } |
| 392 #endif | 397 #endif |
| 393 | 398 |
| 394 KURL KURL::copy() const | 399 KURL KURL::copy() const |
| 395 { | 400 { |
| 396 KURL result = *this; | 401 KURL result = *this; |
| 397 m_url.copyTo(&result.m_url); | 402 m_url.copyTo(&result.m_url); |
| 398 return result; | 403 return result; |
| 399 } | 404 } |
| 400 | 405 |
| 401 bool KURL::isNull() const | 406 bool KURL::isNull() const |
| 402 { | 407 { |
| 403 return m_url.utf8String().isNull(); | 408 return m_url.utf8String().isNull(); |
| 404 } | 409 } |
| 405 | 410 |
| 406 bool KURL::isEmpty() const | 411 bool KURL::isEmpty() const |
| 407 { | 412 { |
| 408 return m_url.utf8String().length() == 0; | 413 return !m_url.utf8String().length(); |
| 409 } | 414 } |
| 410 | 415 |
| 411 bool KURL::isValid() const | 416 bool KURL::isValid() const |
| 412 { | 417 { |
| 413 return m_url.m_isValid; | 418 return m_url.m_isValid; |
| 414 } | 419 } |
| 415 | 420 |
| 416 bool KURL::protocolInHTTPFamily() const | 421 bool KURL::protocolInHTTPFamily() const |
| 417 { | 422 { |
| 418 return m_url.m_protocolInHTTPFamily; | 423 return m_url.m_protocolInHTTPFamily; |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 509 // FIXME determine if KURL.cpp agrees about an empty ref | 514 // FIXME determine if KURL.cpp agrees about an empty ref |
| 510 return m_url.m_parsed.ref.len >= 0; | 515 return m_url.m_parsed.ref.len >= 0; |
| 511 } | 516 } |
| 512 | 517 |
| 513 String KURL::query() const | 518 String KURL::query() const |
| 514 { | 519 { |
| 515 if (m_url.m_parsed.query.len >= 0) { | 520 if (m_url.m_parsed.query.len >= 0) { |
| 516 // KURL's query() includes the question mark, even though the reference | 521 // KURL's query() includes the question mark, even though the reference |
| 517 // doesn't. Move the query component backwards one to account for it | 522 // doesn't. Move the query component backwards one to account for it |
| 518 // (our library doesn't count the question mark). | 523 // (our library doesn't count the question mark). |
| 519 url_parse::Component query_comp = m_url.m_parsed.query; | 524 url_parse::Component queryComp = m_url.m_parsed.query; |
| 520 query_comp.begin--; | 525 queryComp.begin--; |
| 521 query_comp.len++; | 526 queryComp.len++; |
| 522 return m_url.componentString(query_comp); | 527 return m_url.componentString(queryComp); |
| 523 } | 528 } |
| 524 | 529 |
| 525 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns | 530 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns |
| 526 // an empty string when the query is empty rather than a null (not sure | 531 // an empty string when the query is empty rather than a null (not sure |
| 527 // which is right). | 532 // which is right). |
| 528 return String("", 0); | 533 return String("", 0); |
| 529 } | 534 } |
| 530 | 535 |
| 531 String KURL::path() const | 536 String KURL::path() const |
| 532 { | 537 { |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 728 // custom code for now. Using their version will also fix the bug that | 733 // custom code for now. Using their version will also fix the bug that |
| 729 // we ignore the encoding. | 734 // we ignore the encoding. |
| 730 // | 735 // |
| 731 // FIXME b/1350291: This does not get called very often. We just convert | 736 // FIXME b/1350291: This does not get called very often. We just convert |
| 732 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of | 737 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of |
| 733 // sucks, and we don't use the encoding properly, which will make some | 738 // sucks, and we don't use the encoding properly, which will make some |
| 734 // obscure anchor navigations fail. | 739 // obscure anchor navigations fail. |
| 735 CString cstr = str.utf8(); | 740 CString cstr = str.utf8(); |
| 736 | 741 |
| 737 const char* input = cstr.data(); | 742 const char* input = cstr.data(); |
| 738 int input_length = cstr.length(); | 743 int inputLength = cstr.length(); |
| 739 url_canon::RawCanonOutputT<char> unescaped; | 744 url_canon::RawCanonOutputT<char> unescaped; |
| 740 for (int i = 0; i < input_length; i++) { | 745 for (int i = 0; i < inputLength; i++) { |
| 741 if (input[i] == '%') { | 746 if (input[i] == '%') { |
| 742 unsigned char ch; | 747 unsigned char ch; |
| 743 if (url_canon::DecodeEscaped(input, &i, input_length, &ch)) { | 748 if (url_canon::DecodeEscaped(input, &i, inputLength, &ch)) { |
| 744 if (ch == 0) { | 749 if (!ch) { |
| 745 // Never unescape NULLs. | 750 // Never unescape NULLs. |
| 746 unescaped.push_back('%'); | 751 unescaped.push_back('%'); |
| 747 unescaped.push_back('0'); | 752 unescaped.push_back('0'); |
| 748 unescaped.push_back('0'); | 753 unescaped.push_back('0'); |
| 749 } else | 754 } else |
| 750 unescaped.push_back(ch); | 755 unescaped.push_back(ch); |
| 751 } else { | 756 } else { |
| 752 // Invalid escape sequence, copy the percent literal. | 757 // Invalid escape sequence, copy the percent literal. |
| 753 unescaped.push_back('%'); | 758 unescaped.push_back('%'); |
| 754 } | 759 } |
| 755 } else { | 760 } else { |
| 756 // Regular non-escaped 8-bit character. | 761 // Regular non-escaped 8-bit character. |
| 757 unescaped.push_back(input[i]); | 762 unescaped.push_back(input[i]); |
| 758 } | 763 } |
| 759 } | 764 } |
| 760 | 765 |
| 761 // Convert that 8-bit to UTF-16. It's not clear IE does this at all to | 766 // Convert that 8-bit to UTF-16. It's not clear IE does this at all to |
| 762 // JavaScript URLs, but Firefox and Safari do. | 767 // JavaScript URLs, but Firefox and Safari do. |
| 763 url_canon::RawCanonOutputT<url_parse::UTF16Char> utf16; | 768 url_canon::RawCanonOutputT<url_parse::UTF16Char> utf16; |
| 764 for (int i = 0; i < unescaped.length(); i++) { | 769 for (int i = 0; i < unescaped.length(); i++) { |
| 765 unsigned char uch = static_cast<unsigned char>(unescaped.at(i)); | 770 unsigned char uch = static_cast<unsigned char>(unescaped.at(i)); |
| 766 if (uch < 0x80) { | 771 if (uch < 0x80) { |
| 767 // Non-UTF-8, just append directly | 772 // Non-UTF-8, just append directly |
| 768 utf16.push_back(uch); | 773 utf16.push_back(uch); |
| 769 } else { | 774 } else { |
| 770 // next_ch will point to the last character of the decoded | 775 // next_ch will point to the last character of the decoded |
| 771 // character. | 776 // character. |
| 772 int next_ch = i; | 777 int nextCharacter = i; |
| 773 unsigned code_point; | 778 unsigned codePoint; |
| 774 if (url_canon::ReadUTFChar(unescaped.data(), &next_ch, | 779 if (url_canon::ReadUTFChar(unescaped.data(), &nextCharacter, |
| 775 unescaped.length(), &code_point)) { | 780 unescaped.length(), &codePoint)) { |
| 776 // Valid UTF-8 character, convert to UTF-16. | 781 // Valid UTF-8 character, convert to UTF-16. |
| 777 url_canon::AppendUTF16Value(code_point, &utf16); | 782 url_canon::AppendUTF16Value(codePoint, &utf16); |
| 778 i = next_ch; | 783 i = nextCharacter; |
| 779 } else { | 784 } else { |
| 780 // KURL.cpp strips any sequences that are not valid UTF-8. This | 785 // KURL.cpp strips any sequences that are not valid UTF-8. This |
| 781 // sounds scary. Instead, we just keep those invalid code | 786 // sounds scary. Instead, we just keep those invalid code |
| 782 // points and promote to UTF-16. We copy all characters from | 787 // points and promote to UTF-16. We copy all characters from |
| 783 // the current position to the end of the identified sqeuqnce. | 788 // the current position to the end of the identified sqeuqnce. |
| 784 while (i < next_ch) { | 789 while (i < nextCharacter) { |
| 785 utf16.push_back(static_cast<unsigned char>(unescaped.at(i)))
; | 790 utf16.push_back(static_cast<unsigned char>(unescaped.at(i)))
; |
| 786 i++; | 791 i++; |
| 787 } | 792 } |
| 788 utf16.push_back(static_cast<unsigned char>(unescaped.at(i))); | 793 utf16.push_back(static_cast<unsigned char>(unescaped.at(i))); |
| 789 } | 794 } |
| 790 } | 795 } |
| 791 } | 796 } |
| 792 | 797 |
| 793 return String(reinterpret_cast<UChar*>(utf16.data()), utf16.length()); | 798 return String(reinterpret_cast<UChar*>(utf16.data()), utf16.length()); |
| 794 } | 799 } |
| 795 | 800 |
| 796 bool KURL::protocolIs(const char* protocol) const | 801 bool KURL::protocolIs(const char* protocol) const |
| 797 { | 802 { |
| 798 assertProtocolIsGood(protocol); | 803 assertProtocolIsGood(protocol); |
| 799 if (m_url.m_parsed.scheme.len <= 0) | 804 if (m_url.m_parsed.scheme.len <= 0) |
| 800 return protocol == NULL; | 805 return !protocol; |
| 801 return lowerCaseEqualsASCII( | 806 return lowerCaseEqualsASCII( |
| 802 m_url.utf8String().data() + m_url.m_parsed.scheme.begin, | 807 m_url.utf8String().data() + m_url.m_parsed.scheme.begin, |
| 803 m_url.utf8String().data() + m_url.m_parsed.scheme.end(), | 808 m_url.utf8String().data() + m_url.m_parsed.scheme.end(), |
| 804 protocol); | 809 protocol); |
| 805 } | 810 } |
| 806 | 811 |
| 807 bool KURL::isLocalFile() const | 812 bool KURL::isLocalFile() const |
| 808 { | 813 { |
| 809 return protocolIs("file"); | 814 return protocolIs("file"); |
| 810 } | 815 } |
| 811 | 816 |
| 812 // This is called to escape a URL string. It is only used externally when | 817 // This is called to escape a URL string. It is only used externally when |
| 813 // constructing mailto: links to set the query section. Since our query setter | 818 // constructing mailto: links to set the query section. Since our query setter |
| 814 // will automatically do the correct escaping, this function does not have to | 819 // will automatically do the correct escaping, this function does not have to |
| 815 // do any work. | 820 // do any work. |
| 816 // | 821 // |
| 817 // There is a possibility that a future called may use this function in other | 822 // There is a possibility that a future called may use this function in other |
| 818 // ways, and may expect to get a valid URL string. The dangerous thing we want | 823 // ways, and may expect to get a valid URL string. The dangerous thing we want |
| 819 // to protect against here is accidentally getting NULLs in a string that is | 824 // to protect against here is accidentally getting NULLs in a string that is |
| 820 // not supposed to have NULLs. Therefore, we escape NULLs here to prevent this. | 825 // not supposed to have NULLs. Therefore, we escape NULLs here to prevent this. |
| 821 String encodeWithURLEscapeSequences(const String& notEncodedString) | 826 String encodeWithURLEscapeSequences(const String& notEncodedString) |
| 822 { | 827 { |
| 823 CString utf8 = UTF8Encoding().encode( | 828 CString utf8 = UTF8Encoding().encode( |
| 824 reinterpret_cast<const UChar*>(notEncodedString.characters()), | 829 reinterpret_cast<const UChar*>(notEncodedString.characters()), |
| 825 notEncodedString.length(), | 830 notEncodedString.length(), |
| 826 URLEncodedEntitiesForUnencodables); | 831 URLEncodedEntitiesForUnencodables); |
| 827 const char* input = utf8.data(); | 832 const char* input = utf8.data(); |
| 828 int input_len = utf8.length(); | 833 int inputLength = utf8.length(); |
| 829 | 834 |
| 830 Vector<char, 2048> buffer; | 835 Vector<char, 2048> buffer; |
| 831 for (int i = 0; i < input_len; i++) { | 836 for (int i = 0; i < inputLength; i++) { |
| 832 if (input[i] == 0) | 837 if (!input[i]) |
| 833 buffer.append("%00", 3); | 838 buffer.append("%00", 3); |
| 834 else | 839 else |
| 835 buffer.append(input[i]); | 840 buffer.append(input[i]); |
| 836 } | 841 } |
| 837 return String(buffer.data(), buffer.size()); | 842 return String(buffer.data(), buffer.size()); |
| 838 } | 843 } |
| 839 | 844 |
| 840 bool KURL::isHierarchical() const | 845 bool KURL::isHierarchical() const |
| 841 { | 846 { |
| 842 if (!m_url.m_parsed.scheme.is_nonempty()) | 847 if (!m_url.m_parsed.scheme.is_nonempty()) |
| (...skipping 18 matching lines...) Expand all Loading... |
| 861 m_url.m_isValid = false; | 866 m_url.m_isValid = false; |
| 862 m_url.m_protocolInHTTPFamily = false; | 867 m_url.m_protocolInHTTPFamily = false; |
| 863 } | 868 } |
| 864 | 869 |
| 865 // Equal up to reference fragments, if any. | 870 // Equal up to reference fragments, if any. |
| 866 bool equalIgnoringRef(const KURL& a, const KURL& b) | 871 bool equalIgnoringRef(const KURL& a, const KURL& b) |
| 867 { | 872 { |
| 868 // Compute the length of each URL without its ref. Note that the reference | 873 // Compute the length of each URL without its ref. Note that the reference |
| 869 // begin (if it exists) points to the character *after* the '#', so we need | 874 // begin (if it exists) points to the character *after* the '#', so we need |
| 870 // to subtract one. | 875 // to subtract one. |
| 871 int a_len = a.m_url.utf8String().length(); | 876 int aLength = a.m_url.utf8String().length(); |
| 872 if (a.m_url.m_parsed.ref.len >= 0) | 877 if (a.m_url.m_parsed.ref.len >= 0) |
| 873 a_len = a.m_url.m_parsed.ref.begin - 1; | 878 aLength = a.m_url.m_parsed.ref.begin - 1; |
| 874 | 879 |
| 875 int b_len = b.m_url.utf8String().length(); | 880 int bLength = b.m_url.utf8String().length(); |
| 876 if (b.m_url.m_parsed.ref.len >= 0) | 881 if (b.m_url.m_parsed.ref.len >= 0) |
| 877 b_len = b.m_url.m_parsed.ref.begin - 1; | 882 bLength = b.m_url.m_parsed.ref.begin - 1; |
| 878 | 883 |
| 879 return a_len == b_len | 884 return aLength == bLength |
| 880 && strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), a_l
en) == 0; | 885 && !strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), aL
ength); |
| 881 } | 886 } |
| 882 | 887 |
| 883 unsigned KURL::hostStart() const | 888 unsigned KURL::hostStart() const |
| 884 { | 889 { |
| 885 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false); | 890 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false); |
| 886 } | 891 } |
| 887 | 892 |
| 888 unsigned KURL::hostEnd() const | 893 unsigned KURL::hostEnd() const |
| 889 { | 894 { |
| 890 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true); | 895 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 931 } | 936 } |
| 932 | 937 |
| 933 inline bool KURL::protocolIs(const String& string, const char* protocol) | 938 inline bool KURL::protocolIs(const String& string, const char* protocol) |
| 934 { | 939 { |
| 935 return WebCore::protocolIs(string, protocol); | 940 return WebCore::protocolIs(string, protocol); |
| 936 } | 941 } |
| 937 | 942 |
| 938 } // namespace WebCore | 943 } // namespace WebCore |
| 939 | 944 |
| 940 #endif // USE(GOOGLEURL) | 945 #endif // USE(GOOGLEURL) |
| OLD | NEW |