third_party/WebKit/WebCore/platform/KURLGoogle.cpp - Issue 21184: WebKit merge 40722:40785 (part 1)

Side by Side Diff: third_party/WebKit/WebCore/platform/KURLGoogle.cpp

Issue 21184: WebKit merge 40722:40785 (part 1) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 11 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2008, 2009, Google Inc. All rights reserved.	2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions are	5 * modification, are permitted provided that the following conditions are

6 * met:	6 * met:

7 *	7 *

8 * * Redistributions of source code must retain the above copyright	8 * * Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * * Redistributions in binary form must reproduce the above	10 * * Redistributions in binary form must reproduce the above

11 * copyright notice, this list of conditions and the following disclaimer	11 * copyright notice, this list of conditions and the following disclaimer

12 * in the documentation and/or other materials provided with the	12 * in the documentation and/or other materials provided with the

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
50 // canonicalizer.	50 // canonicalizer.

51 class KURLCharsetConverter : public url_canon::CharsetConverter {	51 class KURLCharsetConverter : public url_canon::CharsetConverter {

52 public:	52 public:

53 // The encoding parameter may be NULL, but in this case the object must not	53 // The encoding parameter may be NULL, but in this case the object must not

54 // be called.	54 // be called.

55 KURLCharsetConverter(const TextEncoding* encoding)	55 KURLCharsetConverter(const TextEncoding* encoding)

56 : m_encoding(encoding)	56 : m_encoding(encoding)

57 {	57 {

58 }	58 }

59	59

60 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int input_l en,	60 virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLe ngth,

61 url_canon::CanonOutput* output)	61 url_canon::CanonOutput* output)

62 {	62 {

63 CString encoded = m_encoding->encode(input, input_len, URLEncodedEntitie sForUnencodables);	63 CString encoded = m_encoding->encode(input, inputLength, URLEncodedEntit iesForUnencodables);

64 output->Append(encoded.data(), static_cast<int>(encoded.length()));	64 output->Append(encoded.data(), static_cast<int>(encoded.length()));

65 }	65 }

66	66

67 private:	67 private:

68 const TextEncoding* m_encoding;	68 const TextEncoding* m_encoding;

69 };	69 };

70	70

71 // Note that this function must be named differently than the one in KURL.cpp	71 // Note that this function must be named differently than the one in KURL.cpp

72 // since our unit tests evilly include both files, and their local definition	72 // since our unit tests evilly include both files, and their local definition

73 // will be ambiguous.	73 // will be ambiguous.

(...skipping 19 matching lines...) Expand all Loading...
93 &zero;	93 &zero;

94 }	94 }

95	95

96 static inline bool isUnicodeEncoding(const TextEncoding* encoding)	96 static inline bool isUnicodeEncoding(const TextEncoding* encoding)

97 {	97 {

98 return encoding->encodingForFormSubmission() == UTF8Encoding();	98 return encoding->encodingForFormSubmission() == UTF8Encoding();

99 }	99 }

100	100

101 static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char* str)	101 static bool lowerCaseEqualsASCII(const char* begin, const char* end, const char* str)

102 {	102 {

103 while (begin != end) {	103 while (begin != end && *str) {

104 if (!*str)

105 return false;

106 ASSERT(isASCIILower(*str));	104 ASSERT(isASCIILower(*str));

107 if (toASCIILower(begin++) != str++)	105 if (toASCIILower(begin++) != str++)

108 return false;	106 return false;

109 }	107 }

110 return !*str;	108

	109 // Both strings are equal (ignoring case) if and only if all of the characte rs were equal,

	110 // and the end of both has been reached.

	111 return begin == end && !*str;

111 }	112 }

112	113

113	114

114 // KURLGooglePrivate -----------------------------------------------------------	115 // KURLGooglePrivate -----------------------------------------------------------

115	116

116 KURLGooglePrivate::KURLGooglePrivate()	117 KURLGooglePrivate::KURLGooglePrivate()

117 : m_isValid(false)	118 : m_isValid(false)

118 , m_protocolInHTTPFamily(false)	119 , m_protocolInHTTPFamily(false)

119 , m_utf8IsASCII(true)	120 , m_utf8IsASCII(true)

120 , m_stringIsValid(false)	121 , m_stringIsValid(false)

121 {	122 {

122 }	123 }

123	124

124 KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isVal id)	125 KURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isVal id)

125 : m_isValid(isValid)	126 : m_isValid(isValid)

126 , m_protocolInHTTPFamily(false)	127 , m_protocolInHTTPFamily(false)

127 , m_parsed(parsed)	128 , m_parsed(parsed)

128 , m_utf8IsASCII(true)	129 , m_utf8IsASCII(true)

129 , m_stringIsValid(false)	130 , m_stringIsValid(false)

130 {	131 {

131 }	132 }

132	133

133 // Setters for the data. Using the ASCII version when you know the	134 // Setters for the data. Using the ASCII version when you know the

134 // data is ASCII will be slightly more efficient. The UTF-8 version	135 // data is ASCII will be slightly more efficient. The UTF-8 version

135 // will always be correct if the caller is unsure.	136 // will always be correct if the caller is unsure.

136 void KURLGooglePrivate::setUtf8(const char* data, int data_len)	137 void KURLGooglePrivate::setUtf8(const char* data, int dataLength)

137 {	138 {

138 // The m_utf8IsASCII must always be correct since the DeprecatedString	139 // The m_utf8IsASCII must always be correct since the DeprecatedString

139 // getter must create it with the proper constructor. This test can be	140 // getter must create it with the proper constructor. This test can be

140 // removed when DeprecatedString is gone, but it still might be a	141 // removed when DeprecatedString is gone, but it still might be a

141 // performance win.	142 // performance win.

142 m_utf8IsASCII = true;	143 m_utf8IsASCII = true;

143 for (int i = 0; i < data_len; i++) {	144 for (int i = 0; i < dataLength; i++) {

144 if (static_cast<unsigned char>(data[i]) >= 0x80) {	145 if (static_cast<unsigned char>(data[i]) >= 0x80) {

145 m_utf8IsASCII = false;	146 m_utf8IsASCII = false;

146 break;	147 break;

147 }	148 }

148 }	149 }

149	150

150 m_utf8 = CString(data, data_len);	151 m_utf8 = CString(data, dataLength);

151 m_stringIsValid = false;	152 m_stringIsValid = false;

152 initProtocolInHTTPFamily();	153 initProtocolInHTTPFamily();

153 }	154 }

154	155

155 void KURLGooglePrivate::setAscii(const char* data, int data_len)	156 void KURLGooglePrivate::setAscii(const char* data, int dataLength)

156 {	157 {

157 m_utf8 = CString(data, data_len);	158 m_utf8 = CString(data, dataLength);

158 m_utf8IsASCII = true;	159 m_utf8IsASCII = true;

159 m_stringIsValid = false;	160 m_stringIsValid = false;

160 initProtocolInHTTPFamily();	161 initProtocolInHTTPFamily();

161 }	162 }

162	163

163 void KURLGooglePrivate::init(const KURL& base,	164 void KURLGooglePrivate::init(const KURL& base,

164 const String& relative,	165 const String& relative,

165 const TextEncoding* queryEncoding)	166 const TextEncoding* queryEncoding)

166 {	167 {

167 init(base, relative.characters(), relative.length(), queryEncoding);	168 init(base, relative.characters(), relative.length(), queryEncoding);

168 }	169 }

169	170

170 // Note: code mostly duplicated below.	171 // Note: code mostly duplicated below.

171 void KURLGooglePrivate::init(const KURL& base, const char* rel, int rel_len,	172 void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength,

172 const TextEncoding* query_encoding)	173 const TextEncoding* queryEncoding)

173 {	174 {

174 // As a performance optimization, we do not use the charset converter if	175 // As a performance optimization, we do not use the charset converter if

175 // encoding is UTF-8 or other Unicode encodings. Note that this is	176 // encoding is UTF-8 or other Unicode encodings. Note that this is

176 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be	177 // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be

177 // more efficient with no charset converter object because it	178 // more efficient with no charset converter object because it

178 // can do UTF-8 internally with no extra copies.	179 // can do UTF-8 internally with no extra copies.

179	180

180 // We feel free to make the charset converter object every time since it's	181 // We feel free to make the charset converter object every time since it's

181 // just a wrapper around a reference.	182 // just a wrapper around a reference.

182 KURLCharsetConverter charset_converter_object(query_encoding);	183 KURLCharsetConverter charsetConverterObject(queryEncoding);

183 KURLCharsetConverter* charset_converter =	184 KURLCharsetConverter* charsetConverter =

184 (!query_encoding \|\| isUnicodeEncoding(query_encoding)) ? 0 :	185 (!queryEncoding \|\| isUnicodeEncoding(queryEncoding)) ? 0 :

185 &charset_converter_object;	186 &charsetConverterObject;

186	187

187 url_canon::RawCanonOutputT<char> output;	188 url_canon::RawCanonOutputT<char> output;

188 const CString& baseStr = base.m_url.utf8String();	189 const CString& baseStr = base.m_url.utf8String();

189 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),	190 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),

190 base.m_url.m_parsed, rel, rel_len,	191 base.m_url.m_parsed, rel, relLength,

191 charset_converter,	192 charsetConverter,

192 &output, &m_parsed);	193 &output, &m_parsed);

193	194

194 // See FIXME in KURLGooglePrivate in the header. If canonicalization has not	195 // See FIXME in KURLGooglePrivate in the header. If canonicalization has not

195 // changed the string, we can avoid an extra allocation by using assignment.	196 // changed the string, we can avoid an extra allocation by using assignment.

196 //	197 //

197 // When KURL encounters an error such that the URL is invalid and empty	198 // When KURL encounters an error such that the URL is invalid and empty

198 // (for example, resolving a relative URL on a non-hierarchical base), it	199 // (for example, resolving a relative URL on a non-hierarchical base), it

199 // will produce an isNull URL, and calling setUtf8 will produce an empty	200 // will produce an isNull URL, and calling setUtf8 will produce an empty

200 // non-null URL. This is unlikely to affect anything, but we preserve this	201 // non-null URL. This is unlikely to affect anything, but we preserve this

201 // just in case.	202 // just in case.

202 if (m_isValid \|\| output.length()) {	203 if (m_isValid \|\| output.length()) {

203 // Without ref, the whole url is guaranteed to be ASCII-only.	204 // Without ref, the whole url is guaranteed to be ASCII-only.

204 if (m_parsed.ref.is_nonempty())	205 if (m_parsed.ref.is_nonempty())

205 setUtf8(output.data(), output.length());	206 setUtf8(output.data(), output.length());

206 else	207 else

207 setAscii(output.data(), output.length());	208 setAscii(output.data(), output.length());

208 } else {	209 } else {

209 // WebCore expects resolved URLs to be empty rather than NULL.	210 // WebCore expects resolved URLs to be empty rather than NULL.

210 setUtf8("", 0);	211 setUtf8("", 0);

211 }	212 }

212 }	213 }

213	214

214 // Note: code mostly duplicated above. See FIXMEs and comments there.	215 // Note: code mostly duplicated above. See FIXMEs and comments there.

215 void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int rel_len,	216 void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength,

216 const TextEncoding* query_encoding)	217 const TextEncoding* queryEncoding)

217 {	218 {

218 KURLCharsetConverter charset_converter_object(query_encoding);	219 KURLCharsetConverter charsetConverterObject(queryEncoding);

219 KURLCharsetConverter* charset_converter =	220 KURLCharsetConverter* charsetConverter =

220 (!query_encoding \|\| isUnicodeEncoding(query_encoding)) ? 0 :	221 (!queryEncoding \|\| isUnicodeEncoding(queryEncoding)) ? 0 :

221 &charset_converter_object;	222 &charsetConverterObject;

222	223

223 url_canon::RawCanonOutputT<char> output;	224 url_canon::RawCanonOutputT<char> output;

224 const CString& baseStr = base.m_url.utf8String();	225 const CString& baseStr = base.m_url.utf8String();

225 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),	226 m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),

226 base.m_url.m_parsed, rel, rel_len,	227 base.m_url.m_parsed, rel, relLength,

227 charset_converter,	228 charsetConverter,

228 &output, &m_parsed);	229 &output, &m_parsed);

229	230

230	231

231 if (m_isValid \|\| output.length()) {	232 if (m_isValid \|\| output.length()) {

232 if (m_parsed.ref.is_nonempty())	233 if (m_parsed.ref.is_nonempty())

233 setUtf8(output.data(), output.length());	234 setUtf8(output.data(), output.length());

234 else	235 else

235 setAscii(output.data(), output.length());	236 setAscii(output.data(), output.length());

236 } else	237 } else

237 setUtf8("", 0);	238 setUtf8("", 0);

238 }	239 }

239	240

240 void KURLGooglePrivate::initProtocolInHTTPFamily()	241 void KURLGooglePrivate::initProtocolInHTTPFamily()

241 {	242 {

242 m_protocolInHTTPFamily = m_isValid	243 if (!m_isValid) {

243 && m_parsed.scheme.len >= 4	244 m_protocolInHTTPFamily = false;

244 && toASCIILower(m_utf8.data()[0]) == 'h'	245 return;

245 && toASCIILower(m_utf8.data()[1]) == 't'	246 }

246 && toASCIILower(m_utf8.data()[2]) == 't'	247

247 && toASCIILower(m_utf8.data()[3]) == 'p'	248 const char* scheme = m_utf8.data() + m_parsed.scheme.begin;

248 && (m_parsed.scheme.len == 4	249 if (m_parsed.scheme.len == 4)

249 \|\| (m_parsed.scheme.len == 5 && toASCIILower(m_utf8.data()[4]) == 's '));	250 m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 4, "http" );

	251 else if (m_parsed.scheme.len == 5)

	252 m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 5, "https ");

	253 else

	254 m_protocolInHTTPFamily = false;

250 }	255 }

251	256

252 void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const	257 void KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const

253 {	258 {

254 dest->m_isValid = m_isValid;	259 dest->m_isValid = m_isValid;

255 dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily;	260 dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily;

256 dest->m_parsed = m_parsed;	261 dest->m_parsed = m_parsed;

257	262

258 // Don't copy the 16-bit string since that will be regenerated as needed.	263 // Don't copy the 16-bit string since that will be regenerated as needed.

259 dest->m_utf8 = CString(m_utf8.data(), m_utf8.length());	264 dest->m_utf8 = CString(m_utf8.data(), m_utf8.length());

(...skipping 17 matching lines...) Expand all Loading...
277 // begin will always match the actual value and len (in terms of	282 // begin will always match the actual value and len (in terms of

278 // byte) will be longer than what's needed by 'mid'. However, mid	283 // byte) will be longer than what's needed by 'mid'. However, mid

279 // truncates len to avoid go past the end of a string so that we can	284 // truncates len to avoid go past the end of a string so that we can

280 // get away withtout doing anything here.	285 // get away withtout doing anything here.

281 return string().substring(comp.begin, comp.len);	286 return string().substring(comp.begin, comp.len);

282 }	287 }

283	288

284 void KURLGooglePrivate::replaceComponents(const Replacements& replacements)	289 void KURLGooglePrivate::replaceComponents(const Replacements& replacements)

285 {	290 {

286 url_canon::RawCanonOutputT<char> output;	291 url_canon::RawCanonOutputT<char> output;

287 url_parse::Parsed new_parsed;	292 url_parse::Parsed newParsed;

288	293

289 m_isValid = url_util::ReplaceComponents(utf8String().data(),	294 m_isValid = url_util::ReplaceComponents(utf8String().data(),

290 utf8String().length(), m_parsed, replacements, NULL, &output, &new_parse d);	295 utf8String().length(), m_parsed, rep lacements, 0, &output, &newParsed);

291	296

292 m_parsed = new_parsed;	297 m_parsed = newParsed;

293 if (m_parsed.ref.is_nonempty())	298 if (m_parsed.ref.is_nonempty())

294 setUtf8(output.data(), output.length());	299 setUtf8(output.data(), output.length());

295 else	300 else

296 setAscii(output.data(), output.length());	301 setAscii(output.data(), output.length());

297 }	302 }

298	303

299 const String& KURLGooglePrivate::string() const	304 const String& KURLGooglePrivate::string() const

300 {	305 {

301 if (!m_stringIsValid) {	306 if (!m_stringIsValid) {

302 // Must special case the NULL case, since constructing the	307 // Must special case the NULL case, since constructing the

(...skipping 13 matching lines...) Expand all Loading...
316 // KURL ------------------------------------------------------------------------	321 // KURL ------------------------------------------------------------------------

317	322

318 // Creates with NULL-terminated string input representing an absolute URL.	323 // Creates with NULL-terminated string input representing an absolute URL.

319 // WebCore generally calls this only with hardcoded strings, so the input is	324 // WebCore generally calls this only with hardcoded strings, so the input is

320 // ASCII. We treat is as UTF-8 just in case.	325 // ASCII. We treat is as UTF-8 just in case.

321 KURL::KURL(const char *url)	326 KURL::KURL(const char *url)

322 {	327 {

323 // FIXME The Mac code checks for beginning with a slash and converting to a	328 // FIXME The Mac code checks for beginning with a slash and converting to a

324 // file: URL. We will want to add this as well once we can compile on a	329 // file: URL. We will want to add this as well once we can compile on a

325 // system like that.	330 // system like that.

326 m_url.init(KURL(), url, strlen(url), NULL);	331 m_url.init(KURL(), url, strlen(url), 0);

327	332

328 // The one-argument constructors should never generate a NULL string.	333 // The one-argument constructors should never generate a NULL string.

329 // This is a funny quirk of KURL.cpp (probably a bug) which we preserve.	334 // This is a funny quirk of KURL.cpp (probably a bug) which we preserve.

330 if (m_url.utf8String().isNull())	335 if (m_url.utf8String().isNull())

331 m_url.setAscii("", 0);	336 m_url.setAscii("", 0);

332 }	337 }

333	338

334 // Initializes with a string representing an absolute URL. No encoding	339 // Initializes with a string representing an absolute URL. No encoding

335 // information is specified. This generally happens when a KURL is converted	340 // information is specified. This generally happens when a KURL is converted

336 // to a string and then converted back. In this case, the URL is already	341 // to a string and then converted back. In this case, the URL is already

337 // canonical and in proper escaped form so needs no encoding. We treat it was	342 // canonical and in proper escaped form so needs no encoding. We treat it was

338 // UTF-8 just in case.	343 // UTF-8 just in case.

339 KURL::KURL(const String& url)	344 KURL::KURL(const String& url)

340 {	345 {

341 if (!url.isNull())	346 if (!url.isNull())

342 m_url.init(KURL(), url, NULL);	347 m_url.init(KURL(), url, 0);

343 else {	348 else {

344 // WebCore expects us to preserve the nullness of strings when this	349 // WebCore expects us to preserve the nullness of strings when this

345 // constructor is used. In all other cases, it expects a non-null	350 // constructor is used. In all other cases, it expects a non-null

346 // empty string, which is what init() will create.	351 // empty string, which is what init() will create.

347 m_url.m_isValid = false;	352 m_url.m_isValid = false;

348 m_url.m_protocolInHTTPFamily = false;	353 m_url.m_protocolInHTTPFamily = false;

349 }	354 }

350 }	355 }

351	356

352 // Constructs a new URL given a base URL and a possibly relative input URL.	357 // Constructs a new URL given a base URL and a possibly relative input URL.

353 // This assumes UTF-8 encoding.	358 // This assumes UTF-8 encoding.

354 KURL::KURL(const KURL& base, const String& relative)	359 KURL::KURL(const KURL& base, const String& relative)

355 {	360 {

356 m_url.init(base, relative, NULL);	361 m_url.init(base, relative, 0);

357 }	362 }

358	363

359 // Constructs a new URL given a base URL and a possibly relative input URL.	364 // Constructs a new URL given a base URL and a possibly relative input URL.

360 // Any query portion of the relative URL will be encoded in the given encoding.	365 // Any query portion of the relative URL will be encoded in the given encoding.

361 KURL::KURL(const KURL& base,	366 KURL::KURL(const KURL& base,

362 const String& relative,	367 const String& relative,

363 const TextEncoding& encoding)	368 const TextEncoding& encoding)

364 {	369 {

365 m_url.init(base, relative, &encoding.encodingForFormSubmission());	370 m_url.init(base, relative, &encoding.encodingForFormSubmission());

366 }	371 }

(...skipping 13 matching lines...) Expand all Loading...
380 #if PLATFORM(CF)	385 #if PLATFORM(CF)

381 KURL::KURL(CFURLRef)	386 KURL::KURL(CFURLRef)

382 {	387 {

383 notImplemented();	388 notImplemented();

384 invalidate();	389 invalidate();

385 }	390 }

386	391

387 CFURLRef KURL::createCFURL() const	392 CFURLRef KURL::createCFURL() const

388 {	393 {

389 notImplemented();	394 notImplemented();

390 return NULL;	395 return 0;

391 }	396 }

392 #endif	397 #endif

393	398

394 KURL KURL::copy() const	399 KURL KURL::copy() const

395 {	400 {

396 KURL result = *this;	401 KURL result = *this;

397 m_url.copyTo(&result.m_url);	402 m_url.copyTo(&result.m_url);

398 return result;	403 return result;

399 }	404 }

400	405

401 bool KURL::isNull() const	406 bool KURL::isNull() const

402 {	407 {

403 return m_url.utf8String().isNull();	408 return m_url.utf8String().isNull();

404 }	409 }

405	410

406 bool KURL::isEmpty() const	411 bool KURL::isEmpty() const

407 {	412 {

408 return m_url.utf8String().length() == 0;	413 return !m_url.utf8String().length();

409 }	414 }

410	415

411 bool KURL::isValid() const	416 bool KURL::isValid() const

412 {	417 {

413 return m_url.m_isValid;	418 return m_url.m_isValid;

414 }	419 }

415	420

416 bool KURL::protocolInHTTPFamily() const	421 bool KURL::protocolInHTTPFamily() const

417 {	422 {

418 return m_url.m_protocolInHTTPFamily;	423 return m_url.m_protocolInHTTPFamily;

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
509 // FIXME determine if KURL.cpp agrees about an empty ref	514 // FIXME determine if KURL.cpp agrees about an empty ref

510 return m_url.m_parsed.ref.len >= 0;	515 return m_url.m_parsed.ref.len >= 0;

511 }	516 }

512	517

513 String KURL::query() const	518 String KURL::query() const

514 {	519 {

515 if (m_url.m_parsed.query.len >= 0) {	520 if (m_url.m_parsed.query.len >= 0) {

516 // KURL's query() includes the question mark, even though the reference	521 // KURL's query() includes the question mark, even though the reference

517 // doesn't. Move the query component backwards one to account for it	522 // doesn't. Move the query component backwards one to account for it

518 // (our library doesn't count the question mark).	523 // (our library doesn't count the question mark).

519 url_parse::Component query_comp = m_url.m_parsed.query;	524 url_parse::Component queryComp = m_url.m_parsed.query;

520 query_comp.begin--;	525 queryComp.begin--;

521 query_comp.len++;	526 queryComp.len++;

522 return m_url.componentString(query_comp);	527 return m_url.componentString(queryComp);

523 }	528 }

524	529

525 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns	530 // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns

526 // an empty string when the query is empty rather than a null (not sure	531 // an empty string when the query is empty rather than a null (not sure

527 // which is right).	532 // which is right).

528 return String("", 0);	533 return String("", 0);

529 }	534 }

530	535

531 String KURL::path() const	536 String KURL::path() const

532 {	537 {

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
728 // custom code for now. Using their version will also fix the bug that	733 // custom code for now. Using their version will also fix the bug that

729 // we ignore the encoding.	734 // we ignore the encoding.

730 //	735 //

731 // FIXME b/1350291: This does not get called very often. We just convert	736 // FIXME b/1350291: This does not get called very often. We just convert

732 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of	737 // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of

733 // sucks, and we don't use the encoding properly, which will make some	738 // sucks, and we don't use the encoding properly, which will make some

734 // obscure anchor navigations fail.	739 // obscure anchor navigations fail.

735 CString cstr = str.utf8();	740 CString cstr = str.utf8();

736	741

737 const char* input = cstr.data();	742 const char* input = cstr.data();

738 int input_length = cstr.length();	743 int inputLength = cstr.length();

739 url_canon::RawCanonOutputT<char> unescaped;	744 url_canon::RawCanonOutputT<char> unescaped;

740 for (int i = 0; i < input_length; i++) {	745 for (int i = 0; i < inputLength; i++) {

741 if (input[i] == '%') {	746 if (input[i] == '%') {

742 unsigned char ch;	747 unsigned char ch;

743 if (url_canon::DecodeEscaped(input, &i, input_length, &ch)) {	748 if (url_canon::DecodeEscaped(input, &i, inputLength, &ch)) {

744 if (ch == 0) {	749 if (!ch) {

745 // Never unescape NULLs.	750 // Never unescape NULLs.

746 unescaped.push_back('%');	751 unescaped.push_back('%');

747 unescaped.push_back('0');	752 unescaped.push_back('0');

748 unescaped.push_back('0');	753 unescaped.push_back('0');

749 } else	754 } else

750 unescaped.push_back(ch);	755 unescaped.push_back(ch);

751 } else {	756 } else {

752 // Invalid escape sequence, copy the percent literal.	757 // Invalid escape sequence, copy the percent literal.

753 unescaped.push_back('%');	758 unescaped.push_back('%');

754 }	759 }

755 } else {	760 } else {

756 // Regular non-escaped 8-bit character.	761 // Regular non-escaped 8-bit character.

757 unescaped.push_back(input[i]);	762 unescaped.push_back(input[i]);

758 }	763 }

759 }	764 }

760	765

761 // Convert that 8-bit to UTF-16. It's not clear IE does this at all to	766 // Convert that 8-bit to UTF-16. It's not clear IE does this at all to

762 // JavaScript URLs, but Firefox and Safari do.	767 // JavaScript URLs, but Firefox and Safari do.

763 url_canon::RawCanonOutputT<url_parse::UTF16Char> utf16;	768 url_canon::RawCanonOutputT<url_parse::UTF16Char> utf16;

764 for (int i = 0; i < unescaped.length(); i++) {	769 for (int i = 0; i < unescaped.length(); i++) {

765 unsigned char uch = static_cast<unsigned char>(unescaped.at(i));	770 unsigned char uch = static_cast<unsigned char>(unescaped.at(i));

766 if (uch < 0x80) {	771 if (uch < 0x80) {

767 // Non-UTF-8, just append directly	772 // Non-UTF-8, just append directly

768 utf16.push_back(uch);	773 utf16.push_back(uch);

769 } else {	774 } else {

770 // next_ch will point to the last character of the decoded	775 // next_ch will point to the last character of the decoded

771 // character.	776 // character.

772 int next_ch = i;	777 int nextCharacter = i;

773 unsigned code_point;	778 unsigned codePoint;

774 if (url_canon::ReadUTFChar(unescaped.data(), &next_ch,	779 if (url_canon::ReadUTFChar(unescaped.data(), &nextCharacter,

775 unescaped.length(), &code_point)) {	780 unescaped.length(), &codePoint)) {

776 // Valid UTF-8 character, convert to UTF-16.	781 // Valid UTF-8 character, convert to UTF-16.

777 url_canon::AppendUTF16Value(code_point, &utf16);	782 url_canon::AppendUTF16Value(codePoint, &utf16);

778 i = next_ch;	783 i = nextCharacter;

779 } else {	784 } else {

780 // KURL.cpp strips any sequences that are not valid UTF-8. This	785 // KURL.cpp strips any sequences that are not valid UTF-8. This

781 // sounds scary. Instead, we just keep those invalid code	786 // sounds scary. Instead, we just keep those invalid code

782 // points and promote to UTF-16. We copy all characters from	787 // points and promote to UTF-16. We copy all characters from

783 // the current position to the end of the identified sqeuqnce.	788 // the current position to the end of the identified sqeuqnce.

784 while (i < next_ch) {	789 while (i < nextCharacter) {

785 utf16.push_back(static_cast<unsigned char>(unescaped.at(i))) ;	790 utf16.push_back(static_cast<unsigned char>(unescaped.at(i))) ;

786 i++;	791 i++;

787 }	792 }

788 utf16.push_back(static_cast<unsigned char>(unescaped.at(i)));	793 utf16.push_back(static_cast<unsigned char>(unescaped.at(i)));

789 }	794 }

790 }	795 }

791 }	796 }

792	797

793 return String(reinterpret_cast<UChar*>(utf16.data()), utf16.length());	798 return String(reinterpret_cast<UChar*>(utf16.data()), utf16.length());

794 }	799 }

795	800

796 bool KURL::protocolIs(const char* protocol) const	801 bool KURL::protocolIs(const char* protocol) const

797 {	802 {

798 assertProtocolIsGood(protocol);	803 assertProtocolIsGood(protocol);

799 if (m_url.m_parsed.scheme.len <= 0)	804 if (m_url.m_parsed.scheme.len <= 0)

800 return protocol == NULL;	805 return !protocol;

801 return lowerCaseEqualsASCII(	806 return lowerCaseEqualsASCII(

802 m_url.utf8String().data() + m_url.m_parsed.scheme.begin,	807 m_url.utf8String().data() + m_url.m_parsed.scheme.begin,

803 m_url.utf8String().data() + m_url.m_parsed.scheme.end(),	808 m_url.utf8String().data() + m_url.m_parsed.scheme.end(),

804 protocol);	809 protocol);

805 }	810 }

806	811

807 bool KURL::isLocalFile() const	812 bool KURL::isLocalFile() const

808 {	813 {

809 return protocolIs("file");	814 return protocolIs("file");

810 }	815 }

811	816

812 // This is called to escape a URL string. It is only used externally when	817 // This is called to escape a URL string. It is only used externally when

813 // constructing mailto: links to set the query section. Since our query setter	818 // constructing mailto: links to set the query section. Since our query setter

814 // will automatically do the correct escaping, this function does not have to	819 // will automatically do the correct escaping, this function does not have to

815 // do any work.	820 // do any work.

816 //	821 //

817 // There is a possibility that a future called may use this function in other	822 // There is a possibility that a future called may use this function in other

818 // ways, and may expect to get a valid URL string. The dangerous thing we want	823 // ways, and may expect to get a valid URL string. The dangerous thing we want

819 // to protect against here is accidentally getting NULLs in a string that is	824 // to protect against here is accidentally getting NULLs in a string that is

820 // not supposed to have NULLs. Therefore, we escape NULLs here to prevent this.	825 // not supposed to have NULLs. Therefore, we escape NULLs here to prevent this.

821 String encodeWithURLEscapeSequences(const String& notEncodedString)	826 String encodeWithURLEscapeSequences(const String& notEncodedString)

822 {	827 {

823 CString utf8 = UTF8Encoding().encode(	828 CString utf8 = UTF8Encoding().encode(

824 reinterpret_cast<const UChar*>(notEncodedString.characters()),	829 reinterpret_cast<const UChar*>(notEncodedString.characters()),

825 notEncodedString.length(),	830 notEncodedString.length(),

826 URLEncodedEntitiesForUnencodables);	831 URLEncodedEntitiesForUnencodables);

827 const char* input = utf8.data();	832 const char* input = utf8.data();

828 int input_len = utf8.length();	833 int inputLength = utf8.length();

829	834

830 Vector<char, 2048> buffer;	835 Vector<char, 2048> buffer;

831 for (int i = 0; i < input_len; i++) {	836 for (int i = 0; i < inputLength; i++) {

832 if (input[i] == 0)	837 if (!input[i])

833 buffer.append("%00", 3);	838 buffer.append("%00", 3);

834 else	839 else

835 buffer.append(input[i]);	840 buffer.append(input[i]);

836 }	841 }

837 return String(buffer.data(), buffer.size());	842 return String(buffer.data(), buffer.size());

838 }	843 }

839	844

840 bool KURL::isHierarchical() const	845 bool KURL::isHierarchical() const

841 {	846 {

842 if (!m_url.m_parsed.scheme.is_nonempty())	847 if (!m_url.m_parsed.scheme.is_nonempty())

(...skipping 18 matching lines...) Expand all Loading...
861 m_url.m_isValid = false;	866 m_url.m_isValid = false;

862 m_url.m_protocolInHTTPFamily = false;	867 m_url.m_protocolInHTTPFamily = false;

863 }	868 }

864	869

865 // Equal up to reference fragments, if any.	870 // Equal up to reference fragments, if any.

866 bool equalIgnoringRef(const KURL& a, const KURL& b)	871 bool equalIgnoringRef(const KURL& a, const KURL& b)

867 {	872 {

868 // Compute the length of each URL without its ref. Note that the reference	873 // Compute the length of each URL without its ref. Note that the reference

869 // begin (if it exists) points to the character after the '#', so we need	874 // begin (if it exists) points to the character after the '#', so we need

870 // to subtract one.	875 // to subtract one.

871 int a_len = a.m_url.utf8String().length();	876 int aLength = a.m_url.utf8String().length();

872 if (a.m_url.m_parsed.ref.len >= 0)	877 if (a.m_url.m_parsed.ref.len >= 0)

873 a_len = a.m_url.m_parsed.ref.begin - 1;	878 aLength = a.m_url.m_parsed.ref.begin - 1;

874	879

875 int b_len = b.m_url.utf8String().length();	880 int bLength = b.m_url.utf8String().length();

876 if (b.m_url.m_parsed.ref.len >= 0)	881 if (b.m_url.m_parsed.ref.len >= 0)

877 b_len = b.m_url.m_parsed.ref.begin - 1;	882 bLength = b.m_url.m_parsed.ref.begin - 1;

878	883

879 return a_len == b_len	884 return aLength == bLength

880 && strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), a_l en) == 0;	885 && !strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), aL ength);

881 }	886 }

882	887

883 unsigned KURL::hostStart() const	888 unsigned KURL::hostStart() const

884 {	889 {

885 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false);	890 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false);

886 }	891 }

887	892

888 unsigned KURL::hostEnd() const	893 unsigned KURL::hostEnd() const

889 {	894 {

890 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true);	895 return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true);

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
931 }	936 }

932	937

933 inline bool KURL::protocolIs(const String& string, const char* protocol)	938 inline bool KURL::protocolIs(const String& string, const char* protocol)

934 {	939 {

935 return WebCore::protocolIs(string, protocol);	940 return WebCore::protocolIs(string, protocol);

936 }	941 }

937	942

938 } // namespace WebCore	943 } // namespace WebCore

939	944

940 #endif // USE(GOOGLEURL)	945 #endif // USE(GOOGLEURL)

OLD	NEW

« no previous file with comments | « third_party/WebKit/WebCore/platform/KURL.h ('k') | third_party/WebKit/WebCore/platform/KURLGooglePrivate.h » ('j') | no next file with comments »