gurl.h - Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a.

Side by Side Diff: gurl.h

Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a. (Closed) Base URL: https://chromium.googlesource.com/external/github.com/domokit/gurl@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef URL_GURL_H_	5 #ifndef URL_GURL_H_

6 #define URL_GURL_H_	6 #define URL_GURL_H_

7	7

8 #include <iosfwd>	8 #include <iosfwd>

9 #include <string>	9 #include <string>

10	10

11 #include "base/memory/scoped_ptr.h"	11 #include "base/memory/scoped_ptr.h"

12 #include "base/strings/string16.h"	12 #include "base/strings/string16.h"

	13 #include "base/strings/string_piece.h"

	14 #include "url/third_party/mozilla/url_parse.h"

13 #include "url/url_canon.h"	15 #include "url/url_canon.h"

14 #include "url/url_canon_stdstring.h"	16 #include "url/url_canon_stdstring.h"

15 #include "url/url_constants.h"	17 #include "url/url_constants.h"

16 #include "url/url_export.h"	18 #include "url/url_export.h"

17 #include "url/url_parse.h"

18	19

19 class URL_EXPORT GURL {	20 class URL_EXPORT GURL {

20 public:	21 public:

21 typedef url::StringPieceReplacements<std::string> Replacements;	22 typedef url::StringPieceReplacements<std::string> Replacements;

22 typedef url::StringPieceReplacements<base::string16> ReplacementsW;	23 typedef url::StringPieceReplacements<base::string16> ReplacementsW;

23	24

24 // Creates an empty, invalid URL.	25 // Creates an empty, invalid URL.

25 GURL();	26 GURL();

26	27

27 // Copy construction is relatively inexpensive, with most of the time going	28 // Copy construction is relatively inexpensive, with most of the time going

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
84 // The exception is for empty() URLs (which are !is_valid()) but this will	85 // The exception is for empty() URLs (which are !is_valid()) but this will

85 // return the empty string without asserting.	86 // return the empty string without asserting.

86 //	87 //

87 // Used invalid_spec() below to get the unusable spec of an invalid URL. This	88 // Used invalid_spec() below to get the unusable spec of an invalid URL. This

88 // separation is designed to prevent errors that may cause security problems	89 // separation is designed to prevent errors that may cause security problems

89 // that could result from the mistaken use of an invalid URL.	90 // that could result from the mistaken use of an invalid URL.

90 const std::string& spec() const;	91 const std::string& spec() const;

91	92

92 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be	93 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be

93 // modified or sent over the network. It is designed to be displayed in error	94 // modified or sent over the network. It is designed to be displayed in error

94 // messages to the user, as the apperance of the spec may explain the error.	95 // messages to the user, as the appearance of the spec may explain the error.

95 // If the spec is valid, the valid spec will be returned.	96 // If the spec is valid, the valid spec will be returned.

96 //	97 //

97 // The returned string is guaranteed to be valid UTF-8.	98 // The returned string is guaranteed to be valid UTF-8.

98 const std::string& possibly_invalid_spec() const {	99 const std::string& possibly_invalid_spec() const {

99 return spec_;	100 return spec_;

100 }	101 }

101	102

102 // Getter for the raw parsed structure. This allows callers to locate parts	103 // Getter for the raw parsed structure. This allows callers to locate parts

103 // of the URL within the spec themselves. Most callers should consider using	104 // of the URL within the spec themselves. Most callers should consider using

104 // the individual component getters below.	105 // the individual component getters below.

(...skipping 12 matching lines...) Expand all Loading...
117	118

118 // Allows GURL to used as a key in STL (for example, a std::set or std::map).	119 // Allows GURL to used as a key in STL (for example, a std::set or std::map).

119 bool operator<(const GURL& other) const;	120 bool operator<(const GURL& other) const;

120 bool operator>(const GURL& other) const;	121 bool operator>(const GURL& other) const;

121	122

122 // Resolves a URL that's possibly relative to this object's URL, and returns	123 // Resolves a URL that's possibly relative to this object's URL, and returns

123 // it. Absolute URLs are also handled according to the rules of URLs on web	124 // it. Absolute URLs are also handled according to the rules of URLs on web

124 // pages.	125 // pages.

125 //	126 //

126 // It may be impossible to resolve the URLs properly. If the input is not	127 // It may be impossible to resolve the URLs properly. If the input is not

127 // "standard" (SchemeIsStandard() == false) and the input looks relative, we	128 // "standard" (IsStandard() == false) and the input looks relative, we can't

128 // can't resolve it. In these cases, the result will be an empty, invalid	129 // resolve it. In these cases, the result will be an empty, invalid GURL.

129 // GURL.

130 //	130 //

131 // The result may also be a nonempty, invalid URL if the input has some kind	131 // The result may also be a nonempty, invalid URL if the input has some kind

132 // of encoding error. In these cases, we will try to construct a "good" URL	132 // of encoding error. In these cases, we will try to construct a "good" URL

133 // that may have meaning to the user, but it will be marked invalid.	133 // that may have meaning to the user, but it will be marked invalid.

134 //	134 //

135 // It is an error to resolve a URL relative to an invalid URL. The result	135 // It is an error to resolve a URL relative to an invalid URL. The result

136 // will be the empty URL.	136 // will be the empty URL.

137 GURL Resolve(const std::string& relative) const;	137 GURL Resolve(const std::string& relative) const;

138 GURL Resolve(const base::string16& relative) const;	138 GURL Resolve(const base::string16& relative) const;

139	139

140 // Like Resolve() above but takes a character set encoder which will be used

141 // for any query text specified in the input. The charset converter parameter

142 // may be NULL, in which case it will be treated as UTF-8.

143 //

144 // TODO(brettw): These should be replaced with versions that take something

145 // more friendly than a raw CharsetConverter (maybe like an ICU character set

146 // name).

147 GURL ResolveWithCharsetConverter(

148 const std::string& relative,

149 url::CharsetConverter* charset_converter) const;

150 GURL ResolveWithCharsetConverter(

151 const base::string16& relative,

152 url::CharsetConverter* charset_converter) const;

153

154 // Creates a new GURL by replacing the current URL's components with the	140 // Creates a new GURL by replacing the current URL's components with the

155 // supplied versions. See the Replacements class in url_canon.h for more.	141 // supplied versions. See the Replacements class in url_canon.h for more.

156 //	142 //

157 // These are not particularly quick, so avoid doing mutations when possible.	143 // These are not particularly quick, so avoid doing mutations when possible.

158 // Prefer the 8-bit version when possible.	144 // Prefer the 8-bit version when possible.

159 //	145 //

160 // It is an error to replace components of an invalid URL. The result will	146 // It is an error to replace components of an invalid URL. The result will

161 // be the empty URL.	147 // be the empty URL.

162 //	148 //

163 // Note that we use the more general url::Replacements type to give	149 // Note that we use the more general url::Replacements type to give

(...skipping 23 matching lines...) Expand all Loading...
187 // It is an error to get the origin of an invalid URL. The result	173 // It is an error to get the origin of an invalid URL. The result

188 // will be the empty URL.	174 // will be the empty URL.

189 GURL GetOrigin() const;	175 GURL GetOrigin() const;

190	176

191 // A helper function to return a GURL stripped from the elements that are not	177 // A helper function to return a GURL stripped from the elements that are not

192 // supposed to be sent as HTTP referrer: username, password and ref fragment.	178 // supposed to be sent as HTTP referrer: username, password and ref fragment.

193 // For invalid URLs or URLs that no valid referrers, an empty URL will be	179 // For invalid URLs or URLs that no valid referrers, an empty URL will be

194 // returned.	180 // returned.

195 GURL GetAsReferrer() const;	181 GURL GetAsReferrer() const;

196	182

197 // Returns true if the scheme for the current URL is a known "standard"	183 // Returns true if the scheme for the current URL is a known "standard-format"

198 // scheme. Standard schemes have an authority and a path section. This	184 // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic

199 // includes file: and filesystem:, which some callers may want to filter out	185 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes

200 // explicitly by calling SchemeIsFile[System].	186 // file: and filesystem:, which some callers may want to filter out explicitly

	187 // by calling SchemeIsFile[System].

201 bool IsStandard() const;	188 bool IsStandard() const;

202	189

203 // Returns true if the given parameter (should be lower-case ASCII to match	190 // Returns true if the given parameter (should be lower-case ASCII to match

204 // the canonicalized scheme) is the scheme for this URL. This call is more	191 // the canonicalized scheme) is the scheme for this URL. This call is more

205 // efficient than getting the scheme and comparing it because no copies or	192 // efficient than getting the scheme and comparing it because no copies or

206 // object constructions are done.	193 // object constructions are done.

207 bool SchemeIs(const char* lower_ascii_scheme) const;	194 bool SchemeIs(const char* lower_ascii_scheme) const;

208	195

209 // Returns true if the scheme is "http" or "https".	196 // Returns true if the scheme is "http" or "https".

210 bool SchemeIsHTTPOrHTTPS() const;	197 bool SchemeIsHTTPOrHTTPS() const;

211	198

212 // Returns true is the scheme is "ws" or "wss".	199 // Returns true is the scheme is "ws" or "wss".

213 bool SchemeIsWSOrWSS() const;	200 bool SchemeIsWSOrWSS() const;

214	201

215 // We often need to know if this is a file URL. File URLs are "standard", but	202 // We often need to know if this is a file URL. File URLs are "standard", but

216 // are often treated separately by some programs.	203 // are often treated separately by some programs.

217 bool SchemeIsFile() const {	204 bool SchemeIsFile() const {

218 return SchemeIs(url::kFileScheme);	205 return SchemeIs(url::kFileScheme);

219 }	206 }

220	207

221 // FileSystem URLs need to be treated differently in some cases.	208 // FileSystem URLs need to be treated differently in some cases.

222 bool SchemeIsFileSystem() const {	209 bool SchemeIsFileSystem() const {

223 return SchemeIs(url::kFileSystemScheme);	210 return SchemeIs(url::kFileSystemScheme);

224 }	211 }

225	212

226 // If the scheme indicates a secure connection	213 // Returns true if the scheme indicates a secure connection.

	214 //

	215 // NOTE: This function is deprecated. You probably want

	216 // \|SchemeIsCryptographic\| (if you just want to know if a scheme uses TLS for

	217 // network transport) or Chromium's \|IsOriginSecure\| for a higher-level test

	218 // about an origin's security. See those functions' documentation for more

	219 // detail.

	220 //

	221 // TODO(palmer): Audit callers and change them to \|SchemeIsCryptographic\| or

	222 // \|IsOriginSecure\|, as appropriate. Then remove \|SchemeIsSecure\|.

	223 // crbug.com/362214

227 bool SchemeIsSecure() const {	224 bool SchemeIsSecure() const {

228 return SchemeIs(url::kHttpsScheme) \|\| SchemeIs(url::kWssScheme) \|\|	225 return SchemeIs(url::kHttpsScheme) \|\| SchemeIs(url::kWssScheme) \|\|

229 (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());	226 (SchemeIsFileSystem() && inner_url() &&

	227 inner_url()->SchemeIsSecure());

	228 }

	229

	230 // Returns true if the scheme indicates a network connection that uses TLS or

	231 // some other cryptographic protocol (e.g. QUIC) for security.

	232 //

	233 // This function is a not a complete test of whether or not an origin's code

	234 // is minimally trustworthy. For that, see Chromium's \|IsOriginSecure\| for a

	235 // higher-level and more complete semantics. See that function's documentation

	236 // for more detail.

	237 bool SchemeIsCryptographic() const {

	238 return SchemeIs(url::kHttpsScheme) \|\| SchemeIs(url::kWssScheme);

230 }	239 }

231	240

232 // Returns true if the scheme is "blob".	241 // Returns true if the scheme is "blob".

233 bool SchemeIsBlob() const {	242 bool SchemeIsBlob() const {

234 return SchemeIs(url::kBlobScheme);	243 return SchemeIs(url::kBlobScheme);

235 }	244 }

236	245

237 // The "content" of the URL is everything after the scheme (skipping the	246 // The "content" of the URL is everything after the scheme (skipping the

238 // scheme delimiting colon). It is an error to get the origin of an invalid	247 // scheme delimiting colon). It is an error to get the content of an invalid

239 // URL. The result will be an empty string.	248 // URL: the result will be an empty string.

240 std::string GetContent() const;	249 std::string GetContent() const;

241	250

242 // Returns true if the hostname is an IP address. Note: this function isn't	251 // Returns true if the hostname is an IP address. Note: this function isn't

243 // as cheap as a simple getter because it re-parses the hostname to verify.	252 // as cheap as a simple getter because it re-parses the hostname to verify.

244 // This currently identifies only IPv4 addresses (bug 822685).

245 bool HostIsIPAddress() const;	253 bool HostIsIPAddress() const;

246	254

247 // Getters for various components of the URL. The returned string will be	255 // Getters for various components of the URL. The returned string will be

248 // empty if the component is empty or is not present.	256 // empty if the component is empty or is not present.

249 std::string scheme() const { // Not including the colon. See also SchemeIs.	257 std::string scheme() const { // Not including the colon. See also SchemeIs.

250 return ComponentString(parsed_.scheme);	258 return ComponentString(parsed_.scheme);

251 }	259 }

252 std::string username() const {	260 std::string username() const {

253 return ComponentString(parsed_.username);	261 return ComponentString(parsed_.username);

254 }	262 }

(...skipping 12 matching lines...) Expand all Loading...
267 std::string path() const { // Including first slash following host	275 std::string path() const { // Including first slash following host

268 return ComponentString(parsed_.path);	276 return ComponentString(parsed_.path);

269 }	277 }

270 std::string query() const { // Stuff following '?'	278 std::string query() const { // Stuff following '?'

271 return ComponentString(parsed_.query);	279 return ComponentString(parsed_.query);

272 }	280 }

273 std::string ref() const { // Stuff following '#'	281 std::string ref() const { // Stuff following '#'

274 return ComponentString(parsed_.ref);	282 return ComponentString(parsed_.ref);

275 }	283 }

276	284

277 // Existance querying. These functions will return true if the corresponding	285 // Existence querying. These functions will return true if the corresponding

278 // URL component exists in this URL. Note that existance is different than	286 // URL component exists in this URL. Note that existence is different than

279 // being nonempty. http://www.google.com/? has a query that just happens to	287 // being nonempty. http://www.google.com/? has a query that just happens to

280 // be empty, and has_query() will return true.	288 // be empty, and has_query() will return true.

281 bool has_scheme() const {	289 bool has_scheme() const {

282 return parsed_.scheme.len >= 0;	290 return parsed_.scheme.len >= 0;

283 }	291 }

284 bool has_username() const {	292 bool has_username() const {

285 return parsed_.username.len >= 0;	293 return parsed_.username.len >= 0;

286 }	294 }

287 bool has_password() const {	295 bool has_password() const {

288 return parsed_.password.len >= 0;	296 return parsed_.password.len >= 0;

289 }	297 }

290 bool has_host() const {	298 bool has_host() const {

291 // Note that hosts are special, absense of host means length 0.	299 // Note that hosts are special, absence of host means length 0.

292 return parsed_.host.len > 0;	300 return parsed_.host.len > 0;

293 }	301 }

294 bool has_port() const {	302 bool has_port() const {

295 return parsed_.port.len >= 0;	303 return parsed_.port.len >= 0;

296 }	304 }

297 bool has_path() const {	305 bool has_path() const {

298 // Note that http://www.google.com/" has a path, the path is "/". This can	306 // Note that http://www.google.com/" has a path, the path is "/". This can

299 // return false only for invalid or nonstandard URLs.	307 // return false only for invalid or nonstandard URLs.

300 return parsed_.path.len >= 0;	308 return parsed_.path.len >= 0;

301 }	309 }

302 bool has_query() const {	310 bool has_query() const {

303 return parsed_.query.len >= 0;	311 return parsed_.query.len >= 0;

304 }	312 }

305 bool has_ref() const {	313 bool has_ref() const {

306 return parsed_.ref.len >= 0;	314 return parsed_.ref.len >= 0;

307 }	315 }

308	316

309 // Returns a parsed version of the port. Can also be any of the special	317 // Returns a parsed version of the port. Can also be any of the special

310 // values defined in Parsed for ExtractPort.	318 // values defined in Parsed for ExtractPort.

311 int IntPort() const;	319 int IntPort() const;

312	320

313 // Returns the port number of the url, or the default port number.	321 // Returns the port number of the URL, or the default port number.

314 // If the scheme has no concept of port (or unknown default) returns	322 // If the scheme has no concept of port (or unknown default) returns

315 // PORT_UNSPECIFIED.	323 // PORT_UNSPECIFIED.

316 int EffectiveIntPort() const;	324 int EffectiveIntPort() const;

317	325

318 // Extracts the filename portion of the path and returns it. The filename	326 // Extracts the filename portion of the path and returns it. The filename

319 // is everything after the last slash in the path. This may be empty.	327 // is everything after the last slash in the path. This may be empty.

320 std::string ExtractFileName() const;	328 std::string ExtractFileName() const;

321	329

322 // Returns the path that should be sent to the server. This is the path,	330 // Returns the path that should be sent to the server. This is the path,

323 // parameter, and query portions of the URL. It is guaranteed to be ASCII.	331 // parameter, and query portions of the URL. It is guaranteed to be ASCII.

324 std::string PathForRequest() const;	332 std::string PathForRequest() const;

325	333

326 // Returns the host, excluding the square brackets surrounding IPv6 address	334 // Returns the host, excluding the square brackets surrounding IPv6 address

327 // literals. This can be useful for passing to getaddrinfo().	335 // literals. This can be useful for passing to getaddrinfo().

328 std::string HostNoBrackets() const;	336 std::string HostNoBrackets() const;

329	337

330 // Returns true if this URL's host matches or is in the same domain as	338 // Returns true if this URL's host matches or is in the same domain as

331 // the given input string. For example if this URL was "www.google.com",	339 // the given input string. For example, if the hostname of the URL is

332 // this would match "com", "google.com", and "www.google.com	340 // "www.google.com", this will return true for "com", "google.com", and

333 // (input domain should be lower-case ASCII to match the canonicalized	341 // "www.google.com".

334 // scheme). This call is more efficient than getting the host and check	342 //

	343 // The input domain should be lower-case ASCII to match the canonicalized

	344 // scheme. This call is more efficient than getting the host and check

335 // whether host has the specific domain or not because no copies or	345 // whether host has the specific domain or not because no copies or

336 // object constructions are done.	346 // object constructions are done.

337 //	347 bool DomainIs(base::StringPiece lower_ascii_domain) const;

338 // If function DomainIs has parameter domain_len, which means the parameter

339 // lower_ascii_domain does not gurantee to terminate with NULL character.

340 bool DomainIs(const char* lower_ascii_domain, int domain_len) const;

341	348

342 // If function DomainIs only has parameter lower_ascii_domain, which means	349 // Swaps the contents of this GURL object with \|other\|, without doing

343 // domain string should be terminate with NULL character.

344 bool DomainIs(const char* lower_ascii_domain) const {

345 return DomainIs(lower_ascii_domain,

346 static_cast<int>(strlen(lower_ascii_domain)));

347 }

348

349 // Swaps the contents of this GURL object with the argument without doing

350 // any memory allocations.	350 // any memory allocations.

351 void Swap(GURL* other);	351 void Swap(GURL* other);

352	352

353 // Returns a reference to a singleton empty GURL. This object is for callers	353 // Returns a reference to a singleton empty GURL. This object is for callers

354 // who return references but don't have anything to return in some cases.	354 // who return references but don't have anything to return in some cases.

355 // This function may be called from any thread.	355 // This function may be called from any thread.

356 static const GURL& EmptyGURL();	356 static const GURL& EmptyGURL();

357	357

358 // Returns the inner URL of a nested URL [currently only non-null for	358 // Returns the inner URL of a nested URL [currently only non-null for

359 // filesystem: URLs].	359 // filesystem: URLs].

360 const GURL* inner_url() const {	360 const GURL* inner_url() const {

361 return inner_url_.get();	361 return inner_url_.get();

362 }	362 }

363	363

364 private:	364 private:

365 // Variant of the string parsing constructor that allows the caller to elect	365 // Variant of the string parsing constructor that allows the caller to elect

366 // retain trailing whitespace, if any, on the passed URL spec but only if the	366 // retain trailing whitespace, if any, on the passed URL spec, but only if

367 // scheme is one that allows trailing whitespace. The primary use-case is	367 // the scheme is one that allows trailing whitespace. The primary use-case is

368 // for data: URLs. In most cases, you want to use the single parameter	368 // for data: URLs. In most cases, you want to use the single parameter

369 // constructor above.	369 // constructor above.

370 enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };	370 enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };

371 GURL(const std::string& url_string, RetainWhiteSpaceSelector);	371 GURL(const std::string& url_string, RetainWhiteSpaceSelector);

372	372

373 template<typename STR>	373 template<typename STR>

374 void InitCanonical(const STR& input_spec, bool trim_path_end);	374 void InitCanonical(const STR& input_spec, bool trim_path_end);

375	375

376 void InitializeFromCanonicalSpec();	376 void InitializeFromCanonicalSpec();

377	377

(...skipping 18 matching lines...) Expand all Loading...
396 // Used for nested schemes [currently only filesystem:].	396 // Used for nested schemes [currently only filesystem:].

397 scoped_ptr<GURL> inner_url_;	397 scoped_ptr<GURL> inner_url_;

398	398

399 // TODO bug 684583: Add encoding for query params.	399 // TODO bug 684583: Add encoding for query params.

400 };	400 };

401	401

402 // Stream operator so GURL can be used in assertion statements.	402 // Stream operator so GURL can be used in assertion statements.

403 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);	403 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);

404	404

405 #endif // URL_GURL_H_	405 #endif // URL_GURL_H_

OLD	NEW

« no previous file with comments | « android/java/src/org/chromium/url/IDNStringUtil.java ('k') | gurl.cc » ('j') | no next file with comments »