trunk/src/url/url_canon.h - Issue 15799007: Revert 203027 "Revert 203025 "Make the copy of GURL in src/url b..."

Side by Side Diff: trunk/src/url/url_canon.h

Issue 15799007: Revert 203027 "Revert 203025 "Make the copy of GURL in src/url b..." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef URL_URL_CANON_H_	5 #ifndef URL_URL_CANON_H_

6 #define URL_URL_CANON_H_	6 #define URL_URL_CANON_H_

7	7

8 #include <stdlib.h>	8 #include <stdlib.h>

9 #include <string.h>	9 #include <string.h>

10	10

11 #include "base/string16.h"	11 #include "base/string16.h"

	12 #include "url/url_export.h"

12 #include "url/url_parse.h"	13 #include "url/url_parse.h"

13	14

14 namespace url_canon {	15 namespace url_canon {

15	16

16 // Canonicalizer output -------------------------------------------------------	17 // Canonicalizer output -------------------------------------------------------

17	18

18 // Base class for the canonicalizer output, this maintains a buffer and	19 // Base class for the canonicalizer output, this maintains a buffer and

19 // supports simple resizing and append operations on it.	20 // supports simple resizing and append operations on it.

20 //	21 //

21 // It is VERY IMPORTANT that no virtual function calls be made on the common	22 // It is VERY IMPORTANT that no virtual function calls be made on the common

(...skipping 157 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
179 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};	180 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};

180	181

181 // Character set converter ----------------------------------------------------	182 // Character set converter ----------------------------------------------------

182 //	183 //

183 // Converts query strings into a custom encoding. The embedder can supply an	184 // Converts query strings into a custom encoding. The embedder can supply an

184 // implementation of this class to interface with their own character set	185 // implementation of this class to interface with their own character set

185 // conversion libraries.	186 // conversion libraries.

186 //	187 //

187 // Embedders will want to see the unit test for the ICU version.	188 // Embedders will want to see the unit test for the ICU version.

188	189

189 class CharsetConverter {	190 class URL_EXPORT CharsetConverter {

190 public:	191 public:

191 CharsetConverter() {}	192 CharsetConverter() {}

192 virtual ~CharsetConverter() {}	193 virtual ~CharsetConverter() {}

193	194

194 // Converts the given input string from UTF-16 to whatever output format the	195 // Converts the given input string from UTF-16 to whatever output format the

195 // converter supports. This is used only for the query encoding conversion,	196 // converter supports. This is used only for the query encoding conversion,

196 // which does not fail. Instead, the converter should insert "invalid	197 // which does not fail. Instead, the converter should insert "invalid

197 // character" characters in the output for invalid sequences, and do the	198 // character" characters in the output for invalid sequences, and do the

198 // best it can.	199 // best it can.

199 //	200 //

(...skipping 17 matching lines...) Expand all Loading...
217 // This should be called before parsing if whitespace removal is desired (which	218 // This should be called before parsing if whitespace removal is desired (which

218 // it normally is when you are canonicalizing).	219 // it normally is when you are canonicalizing).

219 //	220 //

220 // If no whitespace is removed, this function will not use the buffer and will	221 // If no whitespace is removed, this function will not use the buffer and will

221 // return a pointer to the input, to avoid the extra copy. If modification is	222 // return a pointer to the input, to avoid the extra copy. If modification is

222 // required, the given \|buffer\| will be used and the returned pointer will	223 // required, the given \|buffer\| will be used and the returned pointer will

223 // point to the beginning of the buffer.	224 // point to the beginning of the buffer.

224 //	225 //

225 // Therefore, callers should not use the buffer, since it may actuall be empty,	226 // Therefore, callers should not use the buffer, since it may actuall be empty,

226 // use the computed pointer and \|*output_len\| instead.	227 // use the computed pointer and \|*output_len\| instead.

227 const char* RemoveURLWhitespace(const char* input, int input_len,	228 URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len,

228 CanonOutputT<char>* buffer,	229 CanonOutputT<char>* buffer,

229 int* output_len);	230 int* output_len);

230 const char16* RemoveURLWhitespace(const char16* input, int input_len,	231 URL_EXPORT const char16* RemoveURLWhitespace(const char16* input, int input_len,

231 CanonOutputT<char16>* buffer,	232 CanonOutputT<char16>* buffer,

232 int* output_len);	233 int* output_len);

233	234

234 // IDN ------------------------------------------------------------------------	235 // IDN ------------------------------------------------------------------------

235	236

236 // Converts the Unicode input representing a hostname to ASCII using IDN rules.	237 // Converts the Unicode input representing a hostname to ASCII using IDN rules.

237 // The output must fall in the ASCII range, but will be encoded in UTF-16.	238 // The output must fall in the ASCII range, but will be encoded in UTF-16.

238 //	239 //

239 // On success, the output will be filled with the ASCII host name and it will	240 // On success, the output will be filled with the ASCII host name and it will

240 // return true. Unlike most other canonicalization functions, this assumes that	241 // return true. Unlike most other canonicalization functions, this assumes that

241 // the output is empty. The beginning of the host will be at offset 0, and	242 // the output is empty. The beginning of the host will be at offset 0, and

242 // the length of the output will be set to the length of the new host name.	243 // the length of the output will be set to the length of the new host name.

243 //	244 //

244 // On error, returns false. The output in this case is undefined.	245 // On error, returns false. The output in this case is undefined.

245 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);	246 URL_EXPORT bool IDNToASCII(const char16* src,

	247 int src_len,

	248 CanonOutputW* output);

246	249

247 // Piece-by-piece canonicalizers ----------------------------------------------	250 // Piece-by-piece canonicalizers ----------------------------------------------

248 //	251 //

249 // These individual canonicalizers append the canonicalized versions of the	252 // These individual canonicalizers append the canonicalized versions of the

250 // corresponding URL component to the given std::string. The spec and the	253 // corresponding URL component to the given std::string. The spec and the

251 // previously-identified range of that component are the input. The range of	254 // previously-identified range of that component are the input. The range of

252 // the canonicalized component will be written to the output component.	255 // the canonicalized component will be written to the output component.

253 //	256 //

254 // These functions all append to the output so they can be chained. Make sure	257 // These functions all append to the output so they can be chained. Make sure

255 // the output is empty when you start.	258 // the output is empty when you start.

256 //	259 //

257 // These functions returns boolean values indicating success. On failure, they	260 // These functions returns boolean values indicating success. On failure, they

258 // will attempt to write something reasonable to the output so that, if	261 // will attempt to write something reasonable to the output so that, if

259 // displayed to the user, they will recognise it as something that's messed up.	262 // displayed to the user, they will recognise it as something that's messed up.

260 // Nothing more should ever be done with these invalid URLs, however.	263 // Nothing more should ever be done with these invalid URLs, however.

261	264

262 // Scheme: Appends the scheme and colon to the URL. The output component will	265 // Scheme: Appends the scheme and colon to the URL. The output component will

263 // indicate the range of characters up to but not including the colon.	266 // indicate the range of characters up to but not including the colon.

264 //	267 //

265 // Canonical URLs always have a scheme. If the scheme is not present in the	268 // Canonical URLs always have a scheme. If the scheme is not present in the

266 // input, this will just write the colon to indicate an empty scheme. Does not	269 // input, this will just write the colon to indicate an empty scheme. Does not

267 // append slashes which will be needed before any authority components for most	270 // append slashes which will be needed before any authority components for most

268 // URLs.	271 // URLs.

269 //	272 //

270 // The 8-bit version requires UTF-8 encoding.	273 // The 8-bit version requires UTF-8 encoding.

271 bool CanonicalizeScheme(const char* spec,	274 URL_EXPORT bool CanonicalizeScheme(const char* spec,

272 const url_parse::Component& scheme,	275 const url_parse::Component& scheme,

273 CanonOutput* output,	276 CanonOutput* output,

274 url_parse::Component* out_scheme);	277 url_parse::Component* out_scheme);

275 bool CanonicalizeScheme(const char16* spec,	278 URL_EXPORT bool CanonicalizeScheme(const char16* spec,

276 const url_parse::Component& scheme,	279 const url_parse::Component& scheme,

277 CanonOutput* output,	280 CanonOutput* output,

278 url_parse::Component* out_scheme);	281 url_parse::Component* out_scheme);

279	282

280 // User info: username/password. If present, this will add the delimiters so	283 // User info: username/password. If present, this will add the delimiters so

281 // the output will be "<username>:<password>@" or "<username>@". Empty	284 // the output will be "<username>:<password>@" or "<username>@". Empty

282 // username/password pairs, or empty passwords, will get converted to	285 // username/password pairs, or empty passwords, will get converted to

283 // nonexistant in the canonical version.	286 // nonexistant in the canonical version.

284 //	287 //

285 // The components for the username and password refer to ranges in the	288 // The components for the username and password refer to ranges in the

286 // respective source strings. Usually, these will be the same string, which	289 // respective source strings. Usually, these will be the same string, which

287 // is legal as long as the two components don't overlap.	290 // is legal as long as the two components don't overlap.

288 //	291 //

289 // The 8-bit version requires UTF-8 encoding.	292 // The 8-bit version requires UTF-8 encoding.

290 bool CanonicalizeUserInfo(const char* username_source,	293 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,

291 const url_parse::Component& username,	294 const url_parse::Component& username,

292 const char* password_source,	295 const char* password_source,

293 const url_parse::Component& password,	296 const url_parse::Component& password,

294 CanonOutput* output,	297 CanonOutput* output,

295 url_parse::Component* out_username,	298 url_parse::Component* out_username,

296 url_parse::Component* out_password);	299 url_parse::Component* out_password);

297 bool CanonicalizeUserInfo(const char16* username_source,	300 URL_EXPORT bool CanonicalizeUserInfo(const char16* username_source,

298 const url_parse::Component& username,	301 const url_parse::Component& username,

299 const char16* password_source,	302 const char16* password_source,

300 const url_parse::Component& password,	303 const url_parse::Component& password,

301 CanonOutput* output,	304 CanonOutput* output,

302 url_parse::Component* out_username,	305 url_parse::Component* out_username,

303 url_parse::Component* out_password);	306 url_parse::Component* out_password);

304	307

305	308

306 // This structure holds detailed state exported from the IP/Host canonicalizers.	309 // This structure holds detailed state exported from the IP/Host canonicalizers.

307 // Additional fields may be added as callers require them.	310 // Additional fields may be added as callers require them.

308 struct CanonHostInfo {	311 struct CanonHostInfo {

309 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}	312 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}

310	313

311 // Convenience function to test if family is an IP address.	314 // Convenience function to test if family is an IP address.

312 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }	315 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }

313	316

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
346 int AddressLength() const {	349 int AddressLength() const {

347 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);	350 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);

348 }	351 }

349 };	352 };

350	353

351	354

352 // Host.	355 // Host.

353 //	356 //

354 // The 8-bit version requires UTF-8 encoding. Use this version when you only	357 // The 8-bit version requires UTF-8 encoding. Use this version when you only

355 // need to know whether canonicalization succeeded.	358 // need to know whether canonicalization succeeded.

356 bool CanonicalizeHost(const char* spec,	359 URL_EXPORT bool CanonicalizeHost(const char* spec,

357 const url_parse::Component& host,	360 const url_parse::Component& host,

358 CanonOutput* output,	361 CanonOutput* output,

359 url_parse::Component* out_host);	362 url_parse::Component* out_host);

360 bool CanonicalizeHost(const char16* spec,	363 URL_EXPORT bool CanonicalizeHost(const char16* spec,

361 const url_parse::Component& host,	364 const url_parse::Component& host,

362 CanonOutput* output,	365 CanonOutput* output,

363 url_parse::Component* out_host);	366 url_parse::Component* out_host);

364	367

365 // Extended version of CanonicalizeHost, which returns additional information.	368 // Extended version of CanonicalizeHost, which returns additional information.

366 // Use this when you need to know whether the hostname was an IP address.	369 // Use this when you need to know whether the hostname was an IP address.

367 // A successful return is indicated by host_info->family != BROKEN. See the	370 // A successful return is indicated by host_info->family != BROKEN. See the

368 // definition of CanonHostInfo above for details.	371 // definition of CanonHostInfo above for details.

369 void CanonicalizeHostVerbose(const char* spec,	372 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,

370 const url_parse::Component& host,	373 const url_parse::Component& host,

371 CanonOutput* output,	374 CanonOutput* output,

372 CanonHostInfo* host_info);	375 CanonHostInfo* host_info);

373 void CanonicalizeHostVerbose(const char16* spec,	376 URL_EXPORT void CanonicalizeHostVerbose(const char16* spec,

374 const url_parse::Component& host,	377 const url_parse::Component& host,

375 CanonOutput* output,	378 CanonOutput* output,

376 CanonHostInfo* host_info);	379 CanonHostInfo* host_info);

377	380

378	381

379 // IP addresses.	382 // IP addresses.

380 //	383 //

381 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is	384 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is

382 // an IP address, it will canonicalize it as such, appending it to \|output\|.	385 // an IP address, it will canonicalize it as such, appending it to \|output\|.

383 // Additional status information is returned via the \|*host_info\| parameter.	386 // Additional status information is returned via the \|*host_info\| parameter.

384 // See the definition of CanonHostInfo above for details.	387 // See the definition of CanonHostInfo above for details.

385 //	388 //

386 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that	389 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that

387 // the input is unescaped and name-prepped, etc. It should not normally be	390 // the input is unescaped and name-prepped, etc. It should not normally be

388 // necessary or wise to call this directly.	391 // necessary or wise to call this directly.

389 void CanonicalizeIPAddress(const char* spec,	392 URL_EXPORT void CanonicalizeIPAddress(const char* spec,

390 const url_parse::Component& host,	393 const url_parse::Component& host,

391 CanonOutput* output,	394 CanonOutput* output,

392 CanonHostInfo* host_info);	395 CanonHostInfo* host_info);

393 void CanonicalizeIPAddress(const char16* spec,	396 URL_EXPORT void CanonicalizeIPAddress(const char16* spec,

394 const url_parse::Component& host,	397 const url_parse::Component& host,

395 CanonOutput* output,	398 CanonOutput* output,

396 CanonHostInfo* host_info);	399 CanonHostInfo* host_info);

397	400

398 // Port: this function will add the colon for the port if a port is present.	401 // Port: this function will add the colon for the port if a port is present.

399 // The caller can pass url_parse::PORT_UNSPECIFIED as the	402 // The caller can pass url_parse::PORT_UNSPECIFIED as the

400 // default_port_for_scheme argument if there is no default port.	403 // default_port_for_scheme argument if there is no default port.

401 //	404 //

402 // The 8-bit version requires UTF-8 encoding.	405 // The 8-bit version requires UTF-8 encoding.

403 bool CanonicalizePort(const char* spec,	406 URL_EXPORT bool CanonicalizePort(const char* spec,

404 const url_parse::Component& port,	407 const url_parse::Component& port,

405 int default_port_for_scheme,	408 int default_port_for_scheme,

406 CanonOutput* output,	409 CanonOutput* output,

407 url_parse::Component* out_port);	410 url_parse::Component* out_port);

408 bool CanonicalizePort(const char16* spec,	411 URL_EXPORT bool CanonicalizePort(const char16* spec,

409 const url_parse::Component& port,	412 const url_parse::Component& port,

410 int default_port_for_scheme,	413 int default_port_for_scheme,

411 CanonOutput* output,	414 CanonOutput* output,

412 url_parse::Component* out_port);	415 url_parse::Component* out_port);

413	416

414 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED	417 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED

415 // if the scheme is unknown.	418 // if the scheme is unknown.

416 int DefaultPortForScheme(const char* scheme, int scheme_len);	419 URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);

417	420

418 // Path. If the input does not begin in a slash (including if the input is	421 // Path. If the input does not begin in a slash (including if the input is

419 // empty), we'll prepend a slash to the path to make it canonical.	422 // empty), we'll prepend a slash to the path to make it canonical.

420 //	423 //

421 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity	424 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity

422 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid	425 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid

423 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't	426 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't

424 // an issue. Somebody giving us an 8-bit path is responsible for generating	427 // an issue. Somebody giving us an 8-bit path is responsible for generating

425 // the path that the server expects (we'll escape high-bit characters), so	428 // the path that the server expects (we'll escape high-bit characters), so

426 // if something is invalid, it's their problem.	429 // if something is invalid, it's their problem.

427 bool CanonicalizePath(const char* spec,	430 URL_EXPORT bool CanonicalizePath(const char* spec,

428 const url_parse::Component& path,	431 const url_parse::Component& path,

429 CanonOutput* output,	432 CanonOutput* output,

430 url_parse::Component* out_path);	433 url_parse::Component* out_path);

431 bool CanonicalizePath(const char16* spec,	434 URL_EXPORT bool CanonicalizePath(const char16* spec,

432 const url_parse::Component& path,	435 const url_parse::Component& path,

433 CanonOutput* output,	436 CanonOutput* output,

434 url_parse::Component* out_path);	437 url_parse::Component* out_path);

435	438

436 // Canonicalizes the input as a file path. This is like CanonicalizePath except	439 // Canonicalizes the input as a file path. This is like CanonicalizePath except

437 // that it also handles Windows drive specs. For example, the path can begin	440 // that it also handles Windows drive specs. For example, the path can begin

438 // with "c\|\" and it will get properly canonicalized to "C:/".	441 // with "c\|\" and it will get properly canonicalized to "C:/".

439 // The string will be appended to \|output\| and \|out_path\| will be updated.	442 // The string will be appended to \|output\| and \|out_path\| will be updated.

440 //	443 //

441 // The 8-bit version requires UTF-8 encoding.	444 // The 8-bit version requires UTF-8 encoding.

442 bool FileCanonicalizePath(const char* spec,	445 URL_EXPORT bool FileCanonicalizePath(const char* spec,

443 const url_parse::Component& path,	446 const url_parse::Component& path,

444 CanonOutput* output,	447 CanonOutput* output,

445 url_parse::Component* out_path);	448 url_parse::Component* out_path);

446 bool FileCanonicalizePath(const char16* spec,	449 URL_EXPORT bool FileCanonicalizePath(const char16* spec,

447 const url_parse::Component& path,	450 const url_parse::Component& path,

448 CanonOutput* output,	451 CanonOutput* output,

449 url_parse::Component* out_path);	452 url_parse::Component* out_path);

450	453

451 // Query: Prepends the ? if needed.	454 // Query: Prepends the ? if needed.

452 //	455 //

453 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly	456 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly

454 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode	457 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode

455 // "invalid character." This function can not fail, we always just try to do	458 // "invalid character." This function can not fail, we always just try to do

456 // our best for crazy input here since web pages can set it themselves.	459 // our best for crazy input here since web pages can set it themselves.

457 //	460 //

458 // This will convert the given input into the output encoding that the given	461 // This will convert the given input into the output encoding that the given

459 // character set converter object provides. The converter will only be called	462 // character set converter object provides. The converter will only be called

460 // if necessary, for ASCII input, no conversions are necessary.	463 // if necessary, for ASCII input, no conversions are necessary.

461 //	464 //

462 // The converter can be NULL. In this case, the output encoding will be UTF-8.	465 // The converter can be NULL. In this case, the output encoding will be UTF-8.

463 void CanonicalizeQuery(const char* spec,	466 URL_EXPORT void CanonicalizeQuery(const char* spec,

464 const url_parse::Component& query,	467 const url_parse::Component& query,

465 CharsetConverter* converter,	468 CharsetConverter* converter,

466 CanonOutput* output,	469 CanonOutput* output,

467 url_parse::Component* out_query);	470 url_parse::Component* out_query);

468 void CanonicalizeQuery(const char16* spec,	471 URL_EXPORT void CanonicalizeQuery(const char16* spec,

469 const url_parse::Component& query,	472 const url_parse::Component& query,

470 CharsetConverter* converter,	473 CharsetConverter* converter,

471 CanonOutput* output,	474 CanonOutput* output,

472 url_parse::Component* out_query);	475 url_parse::Component* out_query);

473	476

474 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only	477 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only

475 // canonicalizer that does not produce ASCII output). The output is	478 // canonicalizer that does not produce ASCII output). The output is

476 // guaranteed to be valid UTF-8.	479 // guaranteed to be valid UTF-8.

477 //	480 //

478 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use	481 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use

479 // the "Unicode replacement character" for the confusing bits and copy the rest.	482 // the "Unicode replacement character" for the confusing bits and copy the rest.

480 void CanonicalizeRef(const char* spec,	483 URL_EXPORT void CanonicalizeRef(const char* spec,

481 const url_parse::Component& path,	484 const url_parse::Component& path,

482 CanonOutput* output,	485 CanonOutput* output,

483 url_parse::Component* out_path);	486 url_parse::Component* out_path);

484 void CanonicalizeRef(const char16* spec,	487 URL_EXPORT void CanonicalizeRef(const char16* spec,

485 const url_parse::Component& path,	488 const url_parse::Component& path,

486 CanonOutput* output,	489 CanonOutput* output,

487 url_parse::Component* out_path);	490 url_parse::Component* out_path);

488	491

489 // Full canonicalizer ---------------------------------------------------------	492 // Full canonicalizer ---------------------------------------------------------

490 //	493 //

491 // These functions replace any string contents, rather than append as above.	494 // These functions replace any string contents, rather than append as above.

492 // See the above piece-by-piece functions for information specific to	495 // See the above piece-by-piece functions for information specific to

493 // canonicalizing individual components.	496 // canonicalizing individual components.

494 //	497 //

495 // The output will be ASCII except the reference fragment, which may be UTF-8.	498 // The output will be ASCII except the reference fragment, which may be UTF-8.

496 //	499 //

497 // The 8-bit versions require UTF-8 encoding.	500 // The 8-bit versions require UTF-8 encoding.

498	501

499 // Use for standard URLs with authorities and paths.	502 // Use for standard URLs with authorities and paths.

500 bool CanonicalizeStandardURL(const char* spec,	503 URL_EXPORT bool CanonicalizeStandardURL(const char* spec,

501 int spec_len,	504 int spec_len,

502 const url_parse::Parsed& parsed,	505 const url_parse::Parsed& parsed,

503 CharsetConverter* query_converter,	506 CharsetConverter* query_converter,

504 CanonOutput* output,	507 CanonOutput* output,

505 url_parse::Parsed* new_parsed);	508 url_parse::Parsed* new_parsed);

506 bool CanonicalizeStandardURL(const char16* spec,	509 URL_EXPORT bool CanonicalizeStandardURL(const char16* spec,

507 int spec_len,	510 int spec_len,

508 const url_parse::Parsed& parsed,	511 const url_parse::Parsed& parsed,

509 CharsetConverter* query_converter,	512 CharsetConverter* query_converter,

510 CanonOutput* output,	513 CanonOutput* output,

511 url_parse::Parsed* new_parsed);	514 url_parse::Parsed* new_parsed);

512	515

513 // Use for file URLs.	516 // Use for file URLs.

514 bool CanonicalizeFileURL(const char* spec,	517 URL_EXPORT bool CanonicalizeFileURL(const char* spec,

515 int spec_len,	518 int spec_len,

516 const url_parse::Parsed& parsed,	519 const url_parse::Parsed& parsed,

517 CharsetConverter* query_converter,	520 CharsetConverter* query_converter,

518 CanonOutput* output,	521 CanonOutput* output,

519 url_parse::Parsed* new_parsed);	522 url_parse::Parsed* new_parsed);

520 bool CanonicalizeFileURL(const char16* spec,	523 URL_EXPORT bool CanonicalizeFileURL(const char16* spec,

521 int spec_len,	524 int spec_len,

522 const url_parse::Parsed& parsed,	525 const url_parse::Parsed& parsed,

523 CharsetConverter* query_converter,	526 CharsetConverter* query_converter,

524 CanonOutput* output,	527 CanonOutput* output,

525 url_parse::Parsed* new_parsed);	528 url_parse::Parsed* new_parsed);

526	529

527 // Use for filesystem URLs.	530 // Use for filesystem URLs.

528 bool CanonicalizeFileSystemURL(const char* spec,	531 URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,

529 int spec_len,	532 int spec_len,

530 const url_parse::Parsed& parsed,	533 const url_parse::Parsed& parsed,

531 CharsetConverter* query_converter,	534 CharsetConverter* query_converter,

532 CanonOutput* output,	535 CanonOutput* output,

533 url_parse::Parsed* new_parsed);	536 url_parse::Parsed* new_parsed);

534 bool CanonicalizeFileSystemURL(const char16* spec,	537 URL_EXPORT bool CanonicalizeFileSystemURL(const char16* spec,

535 int spec_len,	538 int spec_len,

536 const url_parse::Parsed& parsed,	539 const url_parse::Parsed& parsed,

537 CharsetConverter* query_converter,	540 CharsetConverter* query_converter,

538 CanonOutput* output,	541 CanonOutput* output,

539 url_parse::Parsed* new_parsed);	542 url_parse::Parsed* new_parsed);

540	543

541 // Use for path URLs such as javascript. This does not modify the path in any	544 // Use for path URLs such as javascript. This does not modify the path in any

542 // way, for example, by escaping it.	545 // way, for example, by escaping it.

543 bool CanonicalizePathURL(const char* spec,	546 URL_EXPORT bool CanonicalizePathURL(const char* spec,

544 int spec_len,	547 int spec_len,

545 const url_parse::Parsed& parsed,	548 const url_parse::Parsed& parsed,

546 CanonOutput* output,	549 CanonOutput* output,

547 url_parse::Parsed* new_parsed);	550 url_parse::Parsed* new_parsed);

548 bool CanonicalizePathURL(const char16* spec,	551 URL_EXPORT bool CanonicalizePathURL(const char16* spec,

549 int spec_len,	552 int spec_len,

550 const url_parse::Parsed& parsed,	553 const url_parse::Parsed& parsed,

551 CanonOutput* output,	554 CanonOutput* output,

552 url_parse::Parsed* new_parsed);	555 url_parse::Parsed* new_parsed);

553	556

554 // Use for mailto URLs. This "canonicalizes" the url into a path and query	557 // Use for mailto URLs. This "canonicalizes" the url into a path and query

555 // component. It does not attempt to merge "to" fields. It uses UTF-8 for	558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for

556 // the query encoding if there is a query. This is because a mailto URL is	559 // the query encoding if there is a query. This is because a mailto URL is

557 // really intended for an external mail program, and the encoding of a page,	560 // really intended for an external mail program, and the encoding of a page,

558 // etc. which would influence a query encoding normally are irrelevant.	561 // etc. which would influence a query encoding normally are irrelevant.

559 bool CanonicalizeMailtoURL(const char* spec,	562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,

560 int spec_len,	563 int spec_len,

561 const url_parse::Parsed& parsed,	564 const url_parse::Parsed& parsed,

562 CanonOutput* output,	565 CanonOutput* output,

563 url_parse::Parsed* new_parsed);	566 url_parse::Parsed* new_parsed);

564 bool CanonicalizeMailtoURL(const char16* spec,	567 URL_EXPORT bool CanonicalizeMailtoURL(const char16* spec,

565 int spec_len,	568 int spec_len,

566 const url_parse::Parsed& parsed,	569 const url_parse::Parsed& parsed,

567 CanonOutput* output,	570 CanonOutput* output,

568 url_parse::Parsed* new_parsed);	571 url_parse::Parsed* new_parsed);

569	572

570 // Part replacer --------------------------------------------------------------	573 // Part replacer --------------------------------------------------------------

571	574

572 // Internal structure used for storing separate strings for each component.	575 // Internal structure used for storing separate strings for each component.

573 // The basic canonicalization functions use this structure internally so that	576 // The basic canonicalization functions use this structure internally so that

574 // component replacement (different strings for different components) can be	577 // component replacement (different strings for different components) can be

575 // treated on the same code path as regular canonicalization (the same string	578 // treated on the same code path as regular canonicalization (the same string

576 // for each component).	579 // for each component).

577 //	580 //

578 // A url_parse::Parsed structure usually goes along with this. Those	581 // A url_parse::Parsed structure usually goes along with this. Those

(...skipping 164 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
743 // Replace component \| (replacement string) (replacement component)	746 // Replace component \| (replacement string) (replacement component)

744 // Delete component \| (non-NULL) (invalid component: (0,-1))	747 // Delete component \| (non-NULL) (invalid component: (0,-1))

745 //	748 //

746 // We use a pointer to the empty string for the source when the component	749 // We use a pointer to the empty string for the source when the component

747 // should be deleted.	750 // should be deleted.

748 URLComponentSource<CHAR> sources_;	751 URLComponentSource<CHAR> sources_;

749 url_parse::Parsed components_;	752 url_parse::Parsed components_;

750 };	753 };

751	754

752 // The base must be an 8-bit canonical URL.	755 // The base must be an 8-bit canonical URL.

753 bool ReplaceStandardURL(const char* base,	756 URL_EXPORT bool ReplaceStandardURL(const char* base,

754 const url_parse::Parsed& base_parsed,	757 const url_parse::Parsed& base_parsed,

755 const Replacements<char>& replacements,	758 const Replacements<char>& replacements,

756 CharsetConverter* query_converter,	759 CharsetConverter* query_converter,

757 CanonOutput* output,	760 CanonOutput* output,

758 url_parse::Parsed* new_parsed);	761 url_parse::Parsed* new_parsed);

759 bool ReplaceStandardURL(const char* base,	762 URL_EXPORT bool ReplaceStandardURL(const char* base,

760 const url_parse::Parsed& base_parsed,	763 const url_parse::Parsed& base_parsed,

761 const Replacements<char16>& replacements,	764 const Replacements<char16>& replacements,

762 CharsetConverter* query_converter,	765 CharsetConverter* query_converter,

763 CanonOutput* output,	766 CanonOutput* output,

764 url_parse::Parsed* new_parsed);	767 url_parse::Parsed* new_parsed);

765	768

766 // Filesystem URLs can only have the path, query, or ref replaced.	769 // Filesystem URLs can only have the path, query, or ref replaced.

767 // All other components will be ignored.	770 // All other components will be ignored.

768 bool ReplaceFileSystemURL(const char* base,	771 URL_EXPORT bool ReplaceFileSystemURL(const char* base,

769 const url_parse::Parsed& base_parsed,	772 const url_parse::Parsed& base_parsed,

770 const Replacements<char>& replacements,	773 const Replacements<char>& replacements,

771 CharsetConverter* query_converter,	774 CharsetConverter* query_converter,

772 CanonOutput* output,	775 CanonOutput* output,

773 url_parse::Parsed* new_parsed);	776 url_parse::Parsed* new_parsed);

774 bool ReplaceFileSystemURL(const char* base,	777 URL_EXPORT bool ReplaceFileSystemURL(const char* base,

775 const url_parse::Parsed& base_parsed,	778 const url_parse::Parsed& base_parsed,

776 const Replacements<char16>& replacements,	779 const Replacements<char16>& replacements,

777 CharsetConverter* query_converter,	780 CharsetConverter* query_converter,

778 CanonOutput* output,	781 CanonOutput* output,

779 url_parse::Parsed* new_parsed);	782 url_parse::Parsed* new_parsed);

780	783

781 // Replacing some parts of a file URL is not permitted. Everything except	784 // Replacing some parts of a file URL is not permitted. Everything except

782 // the host, path, query, and ref will be ignored.	785 // the host, path, query, and ref will be ignored.

783 bool ReplaceFileURL(const char* base,	786 URL_EXPORT bool ReplaceFileURL(const char* base,

784 const url_parse::Parsed& base_parsed,	787 const url_parse::Parsed& base_parsed,

785 const Replacements<char>& replacements,	788 const Replacements<char>& replacements,

786 CharsetConverter* query_converter,	789 CharsetConverter* query_converter,

787 CanonOutput* output,	790 CanonOutput* output,

788 url_parse::Parsed* new_parsed);	791 url_parse::Parsed* new_parsed);

789 bool ReplaceFileURL(const char* base,	792 URL_EXPORT bool ReplaceFileURL(const char* base,

790 const url_parse::Parsed& base_parsed,	793 const url_parse::Parsed& base_parsed,

791 const Replacements<char16>& replacements,	794 const Replacements<char16>& replacements,

792 CharsetConverter* query_converter,	795 CharsetConverter* query_converter,

793 CanonOutput* output,	796 CanonOutput* output,

794 url_parse::Parsed* new_parsed);	797 url_parse::Parsed* new_parsed);

795	798

796 // Path URLs can only have the scheme and path replaced. All other components	799 // Path URLs can only have the scheme and path replaced. All other components

797 // will be ignored.	800 // will be ignored.

798 bool ReplacePathURL(const char* base,	801 URL_EXPORT bool ReplacePathURL(const char* base,

799 const url_parse::Parsed& base_parsed,	802 const url_parse::Parsed& base_parsed,

800 const Replacements<char>& replacements,	803 const Replacements<char>& replacements,

801 CanonOutput* output,	804 CanonOutput* output,

802 url_parse::Parsed* new_parsed);	805 url_parse::Parsed* new_parsed);

803 bool ReplacePathURL(const char* base,	806 URL_EXPORT bool ReplacePathURL(const char* base,

804 const url_parse::Parsed& base_parsed,	807 const url_parse::Parsed& base_parsed,

805 const Replacements<char16>& replacements,	808 const Replacements<char16>& replacements,

806 CanonOutput* output,	809 CanonOutput* output,

807 url_parse::Parsed* new_parsed);	810 url_parse::Parsed* new_parsed);

808	811

809 // Mailto URLs can only have the scheme, path, and query replaced.	812 // Mailto URLs can only have the scheme, path, and query replaced.

810 // All other components will be ignored.	813 // All other components will be ignored.

811 bool ReplaceMailtoURL(const char* base,	814 URL_EXPORT bool ReplaceMailtoURL(const char* base,

812 const url_parse::Parsed& base_parsed,	815 const url_parse::Parsed& base_parsed,

813 const Replacements<char>& replacements,	816 const Replacements<char>& replacements,

814 CanonOutput* output,	817 CanonOutput* output,

815 url_parse::Parsed* new_parsed);	818 url_parse::Parsed* new_parsed);

816 bool ReplaceMailtoURL(const char* base,	819 URL_EXPORT bool ReplaceMailtoURL(const char* base,

817 const url_parse::Parsed& base_parsed,	820 const url_parse::Parsed& base_parsed,

818 const Replacements<char16>& replacements,	821 const Replacements<char16>& replacements,

819 CanonOutput* output,	822 CanonOutput* output,

820 url_parse::Parsed* new_parsed);	823 url_parse::Parsed* new_parsed);

821	824

822 // Relative URL ---------------------------------------------------------------	825 // Relative URL ---------------------------------------------------------------

823	826

824 // Given an input URL or URL fragment \|fragment\|, determines if it is a	827 // Given an input URL or URL fragment \|fragment\|, determines if it is a

825 // relative or absolute URL and places the result into \|*is_relative\|. If it is	828 // relative or absolute URL and places the result into \|*is_relative\|. If it is

826 // relative, the relevant portion of the URL will be placed into	829 // relative, the relevant portion of the URL will be placed into

827 // \|*relative_component\| (there may have been trimmed whitespace, for example).	830 // \|*relative_component\| (there may have been trimmed whitespace, for example).

828 // This value is passed to ResolveRelativeURL. If the input is not relative,	831 // This value is passed to ResolveRelativeURL. If the input is not relative,

829 // this value is UNDEFINED (it may be changed by the function).	832 // this value is UNDEFINED (it may be changed by the function).

830 //	833 //

831 // Returns true on success (we successfully determined the URL is relative or	834 // Returns true on success (we successfully determined the URL is relative or

832 // not). Failure means that the combination of URLs doesn't make any sense.	835 // not). Failure means that the combination of URLs doesn't make any sense.

833 //	836 //

834 // The base URL should always be canonical, therefore is ASCII.	837 // The base URL should always be canonical, therefore is ASCII.

835 bool IsRelativeURL(const char* base,	838 URL_EXPORT bool IsRelativeURL(const char* base,

836 const url_parse::Parsed& base_parsed,	839 const url_parse::Parsed& base_parsed,

837 const char* fragment,	840 const char* fragment,

838 int fragment_len,	841 int fragment_len,

839 bool is_base_hierarchical,	842 bool is_base_hierarchical,

840 bool* is_relative,	843 bool* is_relative,

841 url_parse::Component* relative_component);	844 url_parse::Component* relative_component);

842 bool IsRelativeURL(const char* base,	845 URL_EXPORT bool IsRelativeURL(const char* base,

843 const url_parse::Parsed& base_parsed,	846 const url_parse::Parsed& base_parsed,

844 const char16* fragment,	847 const char16* fragment,

845 int fragment_len,	848 int fragment_len,

846 bool is_base_hierarchical,	849 bool is_base_hierarchical,

847 bool* is_relative,	850 bool* is_relative,

848 url_parse::Component* relative_component);	851 url_parse::Component* relative_component);

849	852

850 // Given a canonical parsed source URL, a URL fragment known to be relative,	853 // Given a canonical parsed source URL, a URL fragment known to be relative,

851 // and the identified relevant portion of the relative URL (computed by	854 // and the identified relevant portion of the relative URL (computed by

852 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and	855 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and

853 // \|out_parsed\|.	856 // \|out_parsed\|.

854 //	857 //

855 // It also requires a flag indicating whether the base URL is a file: URL	858 // It also requires a flag indicating whether the base URL is a file: URL

856 // which triggers additional logic.	859 // which triggers additional logic.

857 //	860 //

858 // The base URL should be canonical and have a host (may be empty for file	861 // The base URL should be canonical and have a host (may be empty for file

859 // URLs) and a path. If it doesn't have these, we can't resolve relative	862 // URLs) and a path. If it doesn't have these, we can't resolve relative

860 // URLs off of it and will return the base as the output with an error flag.	863 // URLs off of it and will return the base as the output with an error flag.

861 // Becausee it is canonical is should also be ASCII.	864 // Becausee it is canonical is should also be ASCII.

862 //	865 //

863 // The query charset converter follows the same rules as CanonicalizeQuery.	866 // The query charset converter follows the same rules as CanonicalizeQuery.

864 //	867 //

865 // Returns true on success. On failure, the output will be "something	868 // Returns true on success. On failure, the output will be "something

866 // reasonable" that will be consistent and valid, just probably not what	869 // reasonable" that will be consistent and valid, just probably not what

867 // was intended by the web page author or caller.	870 // was intended by the web page author or caller.

868 bool ResolveRelativeURL(const char* base_url,	871 URL_EXPORT bool ResolveRelativeURL(

869 const url_parse::Parsed& base_parsed,	872 const char* base_url,

870 bool base_is_file,	873 const url_parse::Parsed& base_parsed,

871 const char* relative_url,	874 bool base_is_file,

872 const url_parse::Component& relative_component,	875 const char* relative_url,

873 CharsetConverter* query_converter,	876 const url_parse::Component& relative_component,

874 CanonOutput* output,	877 CharsetConverter* query_converter,

875 url_parse::Parsed* out_parsed);	878 CanonOutput* output,

876 bool ResolveRelativeURL(const char* base_url,	879 url_parse::Parsed* out_parsed);

877 const url_parse::Parsed& base_parsed,	880 URL_EXPORT bool ResolveRelativeURL(

878 bool base_is_file,	881 const char* base_url,

879 const char16* relative_url,	882 const url_parse::Parsed& base_parsed,

880 const url_parse::Component& relative_component,	883 bool base_is_file,

881 CharsetConverter* query_converter,	884 const char16* relative_url,

882 CanonOutput* output,	885 const url_parse::Component& relative_component,

883 url_parse::Parsed* out_parsed);	886 CharsetConverter* query_converter,

	887 CanonOutput* output,

	888 url_parse::Parsed* out_parsed);

884	889

885 } // namespace url_canon	890 } // namespace url_canon

886	891

887 #endif // URL_URL_CANON_H_	892 #endif // URL_URL_CANON_H_

OLD	NEW

« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »