src/url_canon.h - Issue 2939004: Add option to build googleurl as dll...

Side by Side Diff: src/url_canon.h

Issue 2939004: Add option to build googleurl as dll... (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/

Patch Set: '' Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2007, Google Inc.	1 // Copyright 2007, Google Inc.

2 // All rights reserved.	2 // All rights reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions are	5 // modification, are permitted provided that the following conditions are

6 // met:	6 // met:

7 //	7 //

8 // * Redistributions of source code must retain the above copyright	8 // * Redistributions of source code must retain the above copyright

9 // notice, this list of conditions and the following disclaimer.	9 // notice, this list of conditions and the following disclaimer.

10 // * Redistributions in binary form must reproduce the above	10 // * Redistributions in binary form must reproduce the above

(...skipping 15 matching lines...) Expand all Loading...
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

29 #ifndef GOOGLEURL_SRC_URL_CANON_H__	29 #ifndef GOOGLEURL_SRC_URL_CANON_H__

30 #define GOOGLEURL_SRC_URL_CANON_H__	30 #define GOOGLEURL_SRC_URL_CANON_H__

31	31

32 #include <memory.h>	32 #include <memory.h>

33 #include <stdlib.h>	33 #include <stdlib.h>

34	34

35 #include "base/string16.h"	35 #include "base/string16.h"

	36 #include "googleurl/src/url_common.h"

36 #include "googleurl/src/url_parse.h"	37 #include "googleurl/src/url_parse.h"

37	38

38 namespace url_canon {	39 namespace url_canon {

39	40

40 // Canonicalizer output -------------------------------------------------------	41 // Canonicalizer output -------------------------------------------------------

41	42

42 // Base class for the canonicalizer output, this maintains a buffer and	43 // Base class for the canonicalizer output, this maintains a buffer and

43 // supports simple resizing and append operations on it.	44 // supports simple resizing and append operations on it.

44 //	45 //

45 // It is VERY IMPORTANT that no virtual function calls be made on the common	46 // It is VERY IMPORTANT that no virtual function calls be made on the common

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
241 // This should be called before parsing if whitespace removal is desired (which	242 // This should be called before parsing if whitespace removal is desired (which

242 // it normally is when you are canonicalizing).	243 // it normally is when you are canonicalizing).

243 //	244 //

244 // If no whitespace is removed, this function will not use the buffer and will	245 // If no whitespace is removed, this function will not use the buffer and will

245 // return a pointer to the input, to avoid the extra copy. If modification is	246 // return a pointer to the input, to avoid the extra copy. If modification is

246 // required, the given \|buffer\| will be used and the returned pointer will	247 // required, the given \|buffer\| will be used and the returned pointer will

247 // point to the beginning of the buffer.	248 // point to the beginning of the buffer.

248 //	249 //

249 // Therefore, callers should not use the buffer, since it may actuall be empty,	250 // Therefore, callers should not use the buffer, since it may actuall be empty,

250 // use the computed pointer and \|*output_len\| instead.	251 // use the computed pointer and \|*output_len\| instead.

251 const char* RemoveURLWhitespace(const char* input, int input_len,	252 GURL_API const char* RemoveURLWhitespace(const char* input, int input_len,

252 CanonOutputT<char>* buffer,	253 CanonOutputT<char>* buffer,

253 int* output_len);	254 int* output_len);

254 const char16* RemoveURLWhitespace(const char16* input, int input_len,	255 GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len,

255 CanonOutputT<char16>* buffer,	256 CanonOutputT<char16>* buffer,

256 int* output_len);	257 int* output_len);

257	258

258 // IDN ------------------------------------------------------------------------	259 // IDN ------------------------------------------------------------------------

259	260

260 // Converts the Unicode input representing a hostname to ASCII using IDN rules.	261 // Converts the Unicode input representing a hostname to ASCII using IDN rules.

261 // The output must fall in the ASCII range, but will be encoded in UTF-16.	262 // The output must fall in the ASCII range, but will be encoded in UTF-16.

262 //	263 //

263 // On success, the output will be filled with the ASCII host name and it will	264 // On success, the output will be filled with the ASCII host name and it will

264 // return true. Unlike most other canonicalization functions, this assumes that	265 // return true. Unlike most other canonicalization functions, this assumes that

265 // the output is empty. The beginning of the host will be at offset 0, and	266 // the output is empty. The beginning of the host will be at offset 0, and

266 // the length of the output will be set to the length of the new host name.	267 // the length of the output will be set to the length of the new host name.

267 //	268 //

268 // On error, returns false. The output in this case is undefined.	269 // On error, returns false. The output in this case is undefined.

269 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);	270 GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);

270	271

271 // Piece-by-piece canonicalizers ----------------------------------------------	272 // Piece-by-piece canonicalizers ----------------------------------------------

272 //	273 //

273 // These individual canonicalizers append the canonicalized versions of the	274 // These individual canonicalizers append the canonicalized versions of the

274 // corresponding URL component to the given std::string. The spec and the	275 // corresponding URL component to the given std::string. The spec and the

275 // previously-identified range of that component are the input. The range of	276 // previously-identified range of that component are the input. The range of

276 // the canonicalized component will be written to the output component.	277 // the canonicalized component will be written to the output component.

277 //	278 //

278 // These functions all append to the output so they can be chained. Make sure	279 // These functions all append to the output so they can be chained. Make sure

279 // the output is empty when you start.	280 // the output is empty when you start.

280 //	281 //

281 // These functions returns boolean values indicating success. On failure, they	282 // These functions returns boolean values indicating success. On failure, they

282 // will attempt to write something reasonable to the output so that, if	283 // will attempt to write something reasonable to the output so that, if

283 // displayed to the user, they will recognise it as something that's messed up.	284 // displayed to the user, they will recognise it as something that's messed up.

284 // Nothing more should ever be done with these invalid URLs, however.	285 // Nothing more should ever be done with these invalid URLs, however.

285	286

286 // Scheme: Appends the scheme and colon to the URL. The output component will	287 // Scheme: Appends the scheme and colon to the URL. The output component will

287 // indicate the range of characters up to but not including the colon.	288 // indicate the range of characters up to but not including the colon.

288 //	289 //

289 // Canonical URLs always have a scheme. If the scheme is not present in the	290 // Canonical URLs always have a scheme. If the scheme is not present in the

290 // input, this will just write the colon to indicate an empty scheme. Does not	291 // input, this will just write the colon to indicate an empty scheme. Does not

291 // append slashes which will be needed before any authority components for most	292 // append slashes which will be needed before any authority components for most

292 // URLs.	293 // URLs.

293 //	294 //

294 // The 8-bit version requires UTF-8 encoding.	295 // The 8-bit version requires UTF-8 encoding.

295 bool CanonicalizeScheme(const char* spec,	296 GURL_API bool CanonicalizeScheme(const char* spec,

296 const url_parse::Component& scheme,	297 const url_parse::Component& scheme,

297 CanonOutput* output,	298 CanonOutput* output,

298 url_parse::Component* out_scheme);	299 url_parse::Component* out_scheme);

299 bool CanonicalizeScheme(const char16* spec,	300 GURL_API bool CanonicalizeScheme(const char16* spec,

300 const url_parse::Component& scheme,	301 const url_parse::Component& scheme,

301 CanonOutput* output,	302 CanonOutput* output,

302 url_parse::Component* out_scheme);	303 url_parse::Component* out_scheme);

303	304

304 // User info: username/password. If present, this will add the delimiters so	305 // User info: username/password. If present, this will add the delimiters so

305 // the output will be "<username>:<password>@" or "<username>@". Empty	306 // the output will be "<username>:<password>@" or "<username>@". Empty

306 // username/password pairs, or empty passwords, will get converted to	307 // username/password pairs, or empty passwords, will get converted to

307 // nonexistant in the canonical version.	308 // nonexistant in the canonical version.

308 //	309 //

309 // The components for the username and password refer to ranges in the	310 // The components for the username and password refer to ranges in the

310 // respective source strings. Usually, these will be the same string, which	311 // respective source strings. Usually, these will be the same string, which

311 // is legal as long as the two components don't overlap.	312 // is legal as long as the two components don't overlap.

312 //	313 //

313 // The 8-bit version requires UTF-8 encoding.	314 // The 8-bit version requires UTF-8 encoding.

314 bool CanonicalizeUserInfo(const char* username_source,	315 GURL_API bool CanonicalizeUserInfo(const char* username_source,

315 const url_parse::Component& username,	316 const url_parse::Component& username,

316 const char* password_source,	317 const char* password_source,

317 const url_parse::Component& password,	318 const url_parse::Component& password,

318 CanonOutput* output,	319 CanonOutput* output,

319 url_parse::Component* out_username,	320 url_parse::Component* out_username,

320 url_parse::Component* out_password);	321 url_parse::Component* out_password);

321 bool CanonicalizeUserInfo(const char16* username_source,	322 GURL_API bool CanonicalizeUserInfo(const char16* username_source,

322 const url_parse::Component& username,	323 const url_parse::Component& username,

323 const char16* password_source,	324 const char16* password_source,

324 const url_parse::Component& password,	325 const url_parse::Component& password,

325 CanonOutput* output,	326 CanonOutput* output,

326 url_parse::Component* out_username,	327 url_parse::Component* out_username,

327 url_parse::Component* out_password);	328 url_parse::Component* out_password);

328	329

329	330

330 // This structure holds detailed state exported from the IP/Host canonicalizers.	331 // This structure holds detailed state exported from the IP/Host canonicalizers.

331 // Additional fields may be added as callers require them.	332 // Additional fields may be added as callers require them.

332 struct CanonHostInfo {	333 struct CanonHostInfo {

333 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}	334 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}

334	335

335 // Convenience function to test if family is an IP address.	336 // Convenience function to test if family is an IP address.

336 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }	337 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }

337	338

(...skipping 21 matching lines...) Expand all Loading...
359 // CanonicalizeIPAddress() only sets this field if \|family\| is IPV4 or IPV6.	360 // CanonicalizeIPAddress() only sets this field if \|family\| is IPV4 or IPV6.

360 // CanonicalizeHostVerbose() always sets it.	361 // CanonicalizeHostVerbose() always sets it.

361 url_parse::Component out_host;	362 url_parse::Component out_host;

362 };	363 };

363	364

364	365

365 // Host.	366 // Host.

366 //	367 //

367 // The 8-bit version requires UTF-8 encoding. Use this version when you only	368 // The 8-bit version requires UTF-8 encoding. Use this version when you only

368 // need to know whether canonicalization succeeded.	369 // need to know whether canonicalization succeeded.

369 bool CanonicalizeHost(const char* spec,	370 GURL_API bool CanonicalizeHost(const char* spec,

370 const url_parse::Component& host,	371 const url_parse::Component& host,

371 CanonOutput* output,	372 CanonOutput* output,

372 url_parse::Component* out_host);	373 url_parse::Component* out_host);

373 bool CanonicalizeHost(const char16* spec,	374 GURL_API bool CanonicalizeHost(const char16* spec,

374 const url_parse::Component& host,	375 const url_parse::Component& host,

375 CanonOutput* output,	376 CanonOutput* output,

376 url_parse::Component* out_host);	377 url_parse::Component* out_host);

377	378

378 // Extended version of CanonicalizeHost, which returns additional information.	379 // Extended version of CanonicalizeHost, which returns additional information.

379 // Use this when you need to know whether the hostname was an IP address.	380 // Use this when you need to know whether the hostname was an IP address.

380 // A successful return is indicated by host_info->family != BROKEN. See the	381 // A successful return is indicated by host_info->family != BROKEN. See the

381 // definition of CanonHostInfo above for details.	382 // definition of CanonHostInfo above for details.

382 void CanonicalizeHostVerbose(const char* spec,	383 GURL_API void CanonicalizeHostVerbose(const char* spec,

383 const url_parse::Component& host,	384 const url_parse::Component& host,

384 CanonOutput* output,	385 CanonOutput* output,

385 CanonHostInfo* host_info);	386 CanonHostInfo* host_info);

386 void CanonicalizeHostVerbose(const char16* spec,	387 GURL_API void CanonicalizeHostVerbose(const char16* spec,

387 const url_parse::Component& host,	388 const url_parse::Component& host,

388 CanonOutput* output,	389 CanonOutput* output,

389 CanonHostInfo* host_info);	390 CanonHostInfo* host_info);

390	391

391	392

392 // IP addresses.	393 // IP addresses.

393 //	394 //

394 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is	395 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is

395 // an IP address, it will canonicalize it as such, appending it to \|output\|.	396 // an IP address, it will canonicalize it as such, appending it to \|output\|.

396 // Additional status information is returned via the \|*host_info\| parameter.	397 // Additional status information is returned via the \|*host_info\| parameter.

397 // See the definition of CanonHostInfo above for details.	398 // See the definition of CanonHostInfo above for details.

398 //	399 //

399 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that	400 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that

400 // the input is unescaped and name-prepped, etc. It should not normally be	401 // the input is unescaped and name-prepped, etc. It should not normally be

401 // necessary or wise to call this directly.	402 // necessary or wise to call this directly.

402 void CanonicalizeIPAddress(const char* spec,	403 GURL_API void CanonicalizeIPAddress(const char* spec,

403 const url_parse::Component& host,	404 const url_parse::Component& host,

404 CanonOutput* output,	405 CanonOutput* output,

405 CanonHostInfo* host_info);	406 CanonHostInfo* host_info);

406 void CanonicalizeIPAddress(const char16* spec,	407 GURL_API void CanonicalizeIPAddress(const char16* spec,

407 const url_parse::Component& host,	408 const url_parse::Component& host,

408 CanonOutput* output,	409 CanonOutput* output,

409 CanonHostInfo* host_info);	410 CanonHostInfo* host_info);

410	411

411 // Port: this function will add the colon for the port if a port is present.	412 // Port: this function will add the colon for the port if a port is present.

412 // The caller can pass url_parse::PORT_UNSPECIFIED as the	413 // The caller can pass url_parse::PORT_UNSPECIFIED as the

413 // default_port_for_scheme argument if there is no default port.	414 // default_port_for_scheme argument if there is no default port.

414 //	415 //

415 // The 8-bit version requires UTF-8 encoding.	416 // The 8-bit version requires UTF-8 encoding.

416 bool CanonicalizePort(const char* spec,	417 GURL_API bool CanonicalizePort(const char* spec,

417 const url_parse::Component& port,	418 const url_parse::Component& port,

418 int default_port_for_scheme,	419 int default_port_for_scheme,

419 CanonOutput* output,	420 CanonOutput* output,

420 url_parse::Component* out_port);	421 url_parse::Component* out_port);

421 bool CanonicalizePort(const char16* spec,	422 GURL_API bool CanonicalizePort(const char16* spec,

422 const url_parse::Component& port,	423 const url_parse::Component& port,

423 int default_port_for_scheme,	424 int default_port_for_scheme,

424 CanonOutput* output,	425 CanonOutput* output,

425 url_parse::Component* out_port);	426 url_parse::Component* out_port);

426	427

427 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED	428 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED

428 // if the scheme is unknown.	429 // if the scheme is unknown.

429 int DefaultPortForScheme(const char* scheme, int scheme_len);	430 GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len);

430	431

431 // Path. If the input does not begin in a slash (including if the input is	432 // Path. If the input does not begin in a slash (including if the input is

432 // empty), we'll prepend a slash to the path to make it canonical.	433 // empty), we'll prepend a slash to the path to make it canonical.

433 //	434 //

434 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity	435 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity

435 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid	436 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid

436 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't	437 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't

437 // an issue. Somebody giving us an 8-bit path is responsible for generating	438 // an issue. Somebody giving us an 8-bit path is responsible for generating

438 // the path that the server expects (we'll escape high-bit characters), so	439 // the path that the server expects (we'll escape high-bit characters), so

439 // if something is invalid, it's their problem.	440 // if something is invalid, it's their problem.

440 bool CanonicalizePath(const char* spec,	441 GURL_API bool CanonicalizePath(const char* spec,

441 const url_parse::Component& path,	442 const url_parse::Component& path,

442 CanonOutput* output,	443 CanonOutput* output,

443 url_parse::Component* out_path);	444 url_parse::Component* out_path);

444 bool CanonicalizePath(const char16* spec,	445 GURL_API bool CanonicalizePath(const char16* spec,

445 const url_parse::Component& path,	446 const url_parse::Component& path,

446 CanonOutput* output,	447 CanonOutput* output,

447 url_parse::Component* out_path);	448 url_parse::Component* out_path);

448	449

449 // Canonicalizes the input as a file path. This is like CanonicalizePath except	450 // Canonicalizes the input as a file path. This is like CanonicalizePath except

450 // that it also handles Windows drive specs. For example, the path can begin	451 // that it also handles Windows drive specs. For example, the path can begin

451 // with "c\|\" and it will get properly canonicalized to "C:/".	452 // with "c\|\" and it will get properly canonicalized to "C:/".

452 // The string will be appended to \|output\| and \|out_path\| will be updated.	453 // The string will be appended to \|output\| and \|out_path\| will be updated.

453 //	454 //

454 // The 8-bit version requires UTF-8 encoding.	455 // The 8-bit version requires UTF-8 encoding.

455 bool FileCanonicalizePath(const char* spec,	456 GURL_API bool FileCanonicalizePath(const char* spec,

456 const url_parse::Component& path,	457 const url_parse::Component& path,

457 CanonOutput* output,	458 CanonOutput* output,

458 url_parse::Component* out_path);	459 url_parse::Component* out_path);

459 bool FileCanonicalizePath(const char16* spec,	460 GURL_API bool FileCanonicalizePath(const char16* spec,

460 const url_parse::Component& path,	461 const url_parse::Component& path,

461 CanonOutput* output,	462 CanonOutput* output,

462 url_parse::Component* out_path);	463 url_parse::Component* out_path);

463	464

464 // Query: Prepends the ? if needed.	465 // Query: Prepends the ? if needed.

465 //	466 //

466 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly	467 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly

467 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode	468 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode

468 // "invalid character." This function can not fail, we always just try to do	469 // "invalid character." This function can not fail, we always just try to do

469 // our best for crazy input here since web pages can set it themselves.	470 // our best for crazy input here since web pages can set it themselves.

470 //	471 //

471 // This will convert the given input into the output encoding that the given	472 // This will convert the given input into the output encoding that the given

472 // character set converter object provides. The converter will only be called	473 // character set converter object provides. The converter will only be called

473 // if necessary, for ASCII input, no conversions are necessary.	474 // if necessary, for ASCII input, no conversions are necessary.

474 //	475 //

475 // The converter can be NULL. In this case, the output encoding will be UTF-8.	476 // The converter can be NULL. In this case, the output encoding will be UTF-8.

476 void CanonicalizeQuery(const char* spec,	477 GURL_API void CanonicalizeQuery(const char* spec,

477 const url_parse::Component& query,	478 const url_parse::Component& query,

478 CharsetConverter* converter,	479 CharsetConverter* converter,

479 CanonOutput* output,	480 CanonOutput* output,

480 url_parse::Component* out_query);	481 url_parse::Component* out_query);

481 void CanonicalizeQuery(const char16* spec,	482 GURL_API void CanonicalizeQuery(const char16* spec,

482 const url_parse::Component& query,	483 const url_parse::Component& query,

483 CharsetConverter* converter,	484 CharsetConverter* converter,

484 CanonOutput* output,	485 CanonOutput* output,

485 url_parse::Component* out_query);	486 url_parse::Component* out_query);

486	487

487 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only	488 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only

488 // canonicalizer that does not produce ASCII output). The output is	489 // canonicalizer that does not produce ASCII output). The output is

489 // guaranteed to be valid UTF-8.	490 // guaranteed to be valid UTF-8.

490 //	491 //

491 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use	492 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use

492 // the "Unicode replacement character" for the confusing bits and copy the rest.	493 // the "Unicode replacement character" for the confusing bits and copy the rest.

493 void CanonicalizeRef(const char* spec,	494 GURL_API void CanonicalizeRef(const char* spec,

494 const url_parse::Component& path,	495 const url_parse::Component& path,

495 CanonOutput* output,	496 CanonOutput* output,

496 url_parse::Component* out_path);	497 url_parse::Component* out_path);

497 void CanonicalizeRef(const char16* spec,	498 GURL_API void CanonicalizeRef(const char16* spec,

498 const url_parse::Component& path,	499 const url_parse::Component& path,

499 CanonOutput* output,	500 CanonOutput* output,

500 url_parse::Component* out_path);	501 url_parse::Component* out_path);

501	502

502 // Full canonicalizer ---------------------------------------------------------	503 // Full canonicalizer ---------------------------------------------------------

503 //	504 //

504 // These functions replace any string contents, rather than append as above.	505 // These functions replace any string contents, rather than append as above.

505 // See the above piece-by-piece functions for information specific to	506 // See the above piece-by-piece functions for information specific to

506 // canonicalizing individual components.	507 // canonicalizing individual components.

507 //	508 //

508 // The output will be ASCII except the reference fragment, which may be UTF-8.	509 // The output will be ASCII except the reference fragment, which may be UTF-8.

509 //	510 //

510 // The 8-bit versions require UTF-8 encoding.	511 // The 8-bit versions require UTF-8 encoding.

511	512

512 // Use for standard URLs with authorities and paths.	513 // Use for standard URLs with authorities and paths.

513 bool CanonicalizeStandardURL(const char* spec,	514 GURL_API bool CanonicalizeStandardURL(const char* spec,

514 int spec_len,	515 int spec_len,

515 const url_parse::Parsed& parsed,	516 const url_parse::Parsed& parsed,

516 CharsetConverter* query_converter,	517 CharsetConverter* query_converter,

517 CanonOutput* output,	518 CanonOutput* output,

518 url_parse::Parsed* new_parsed);	519 url_parse::Parsed* new_parsed);

519 bool CanonicalizeStandardURL(const char16* spec,	520 GURL_API bool CanonicalizeStandardURL(const char16* spec,

520 int spec_len,	521 int spec_len,

521 const url_parse::Parsed& parsed,	522 const url_parse::Parsed& parsed,

522 CharsetConverter* query_converter,	523 CharsetConverter* query_converter,

523 CanonOutput* output,	524 CanonOutput* output,

524 url_parse::Parsed* new_parsed);	525 url_parse::Parsed* new_parsed);

525	526

526 // Use for file URLs.	527 // Use for file URLs.

527 bool CanonicalizeFileURL(const char* spec,	528 GURL_API bool CanonicalizeFileURL(const char* spec,

528 int spec_len,	529 int spec_len,

529 const url_parse::Parsed& parsed,	530 const url_parse::Parsed& parsed,

530 CharsetConverter* query_converter,	531 CharsetConverter* query_converter,

531 CanonOutput* output,	532 CanonOutput* output,

532 url_parse::Parsed* new_parsed);	533 url_parse::Parsed* new_parsed);

533 bool CanonicalizeFileURL(const char16* spec,	534 GURL_API bool CanonicalizeFileURL(const char16* spec,

534 int spec_len,	535 int spec_len,

535 const url_parse::Parsed& parsed,	536 const url_parse::Parsed& parsed,

536 CharsetConverter* query_converter,	537 CharsetConverter* query_converter,

537 CanonOutput* output,	538 CanonOutput* output,

538 url_parse::Parsed* new_parsed);	539 url_parse::Parsed* new_parsed);

539	540

540 // Use for path URLs such as javascript. This does not modify the path in any	541 // Use for path URLs such as javascript. This does not modify the path in any

541 // way, for example, by escaping it.	542 // way, for example, by escaping it.

542 bool CanonicalizePathURL(const char* spec,	543 GURL_API bool CanonicalizePathURL(const char* spec,

543 int spec_len,	544 int spec_len,

544 const url_parse::Parsed& parsed,	545 const url_parse::Parsed& parsed,

545 CanonOutput* output,	546 CanonOutput* output,

546 url_parse::Parsed* new_parsed);	547 url_parse::Parsed* new_parsed);

547 bool CanonicalizePathURL(const char16* spec,	548 GURL_API bool CanonicalizePathURL(const char16* spec,

548 int spec_len,	549 int spec_len,

549 const url_parse::Parsed& parsed,	550 const url_parse::Parsed& parsed,

550 CanonOutput* output,	551 CanonOutput* output,

551 url_parse::Parsed* new_parsed);	552 url_parse::Parsed* new_parsed);

552	553

553 // Use for mailto URLs. This "canonicalizes" the url into a path and query	554 // Use for mailto URLs. This "canonicalizes" the url into a path and query

554 // component. It does not attempt to merge "to" fields. It uses UTF-8 for	555 // component. It does not attempt to merge "to" fields. It uses UTF-8 for

555 // the query encoding if there is a query. This is because a mailto URL is	556 // the query encoding if there is a query. This is because a mailto URL is

556 // really intended for an external mail program, and the encoding of a page,	557 // really intended for an external mail program, and the encoding of a page,

557 // etc. which would influence a query encoding normally are irrelevant.	558 // etc. which would influence a query encoding normally are irrelevant.

558 bool CanonicalizeMailtoURL(const char* spec,	559 GURL_API bool CanonicalizeMailtoURL(const char* spec,

559 int spec_len,	560 int spec_len,

560 const url_parse::Parsed& parsed,	561 const url_parse::Parsed& parsed,

561 CanonOutput* output,	562 CanonOutput* output,

562 url_parse::Parsed* new_parsed);	563 url_parse::Parsed* new_parsed);

563 bool CanonicalizeMailtoURL(const char16* spec,	564 GURL_API bool CanonicalizeMailtoURL(const char16* spec,

564 int spec_len,	565 int spec_len,

565 const url_parse::Parsed& parsed,	566 const url_parse::Parsed& parsed,

566 CanonOutput* output,	567 CanonOutput* output,

567 url_parse::Parsed* new_parsed);	568 url_parse::Parsed* new_parsed);

568	569

569 // Part replacer --------------------------------------------------------------	570 // Part replacer --------------------------------------------------------------

570	571

571 // Internal structure used for storing separate strings for each component.	572 // Internal structure used for storing separate strings for each component.

572 // The basic canonicalization functions use this structure internally so that	573 // The basic canonicalization functions use this structure internally so that

573 // component remplacement (different strings for different components) can be	574 // component remplacement (different strings for different components) can be

574 // treated on the same code path as regular canonicalization (the same string	575 // treated on the same code path as regular canonicalization (the same string

575 // for each component).	576 // for each component).

576 //	577 //

577 // A url_parse::Parsed structure usually goes along with this. Those	578 // A url_parse::Parsed structure usually goes along with this. Those

578 // components identify offsets within these strings, so that they can all be	579 // components identify offsets within these strings, so that they can all be

579 // in the same string, or spread arbitrarily across different ones.	580 // in the same string, or spread arbitrarily across different ones.

580 //	581 //

581 // This structures does not own any data. It is the caller's responsibility to	582 // This structures does not own any data. It is the caller's responsibility to

582 // ensure that the data the pointers point to stays in scope and is not	583 // ensure that the data the pointers point to stays in scope and is not

583 // modified.	584 // modified.

584 template<typename CHAR>	585 template<typename CHAR>

585 struct URLComponentSource {	586 struct URLComponentSource {

586 // Constructor normally used by callers wishing to replace components. This	587 // Constructor normally used by callers wishing to replace components. This

587 // will make them all NULL, which is no replacement. The caller would then	588 // will make them all NULL, which is no replacement. The caller would then

588 // override the compoents they want to replace.	589 // override the components they want to replace.

589 URLComponentSource()	590 URLComponentSource()

590 : scheme(NULL),	591 : scheme(NULL),

591 username(NULL),	592 username(NULL),

592 password(NULL),	593 password(NULL),

593 host(NULL),	594 host(NULL),

594 port(NULL),	595 port(NULL),

595 path(NULL),	596 path(NULL),

596 query(NULL),	597 query(NULL),

597 ref(NULL) {	598 ref(NULL) {

598 }	599 }

(...skipping 143 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
742 // Replace component \| (replacement string) (replacement component)	743 // Replace component \| (replacement string) (replacement component)

743 // Delete component \| (non-NULL) (invalid component: (0,-1))	744 // Delete component \| (non-NULL) (invalid component: (0,-1))

744 //	745 //

745 // We use a pointer to the empty string for the source when the component	746 // We use a pointer to the empty string for the source when the component

746 // should be deleted.	747 // should be deleted.

747 URLComponentSource<CHAR> sources_;	748 URLComponentSource<CHAR> sources_;

748 url_parse::Parsed components_;	749 url_parse::Parsed components_;

749 };	750 };

750	751

751 // The base must be an 8-bit canonical URL.	752 // The base must be an 8-bit canonical URL.

752 bool ReplaceStandardURL(const char* base,	753 GURL_API bool ReplaceStandardURL(const char* base,

753 const url_parse::Parsed& base_parsed,	754 const url_parse::Parsed& base_parsed,

754 const Replacements<char>& replacements,	755 const Replacements<char>& replacements,

755 CharsetConverter* query_converter,	756 CharsetConverter* query_converter,

756 CanonOutput* output,	757 CanonOutput* output,

757 url_parse::Parsed* new_parsed);	758 url_parse::Parsed* new_parsed);

758 bool ReplaceStandardURL(const char* base,	759 GURL_API bool ReplaceStandardURL(const char* base,

759 const url_parse::Parsed& base_parsed,	760 const url_parse::Parsed& base_parsed,

760 const Replacements<char16>& replacements,	761 const Replacements<char16>& replacements,

761 CharsetConverter* query_converter,	762 CharsetConverter* query_converter,

762 CanonOutput* output,	763 CanonOutput* output,

763 url_parse::Parsed* new_parsed);	764 url_parse::Parsed* new_parsed);

764	765

765 // Replacing some parts of a file URL is not permitted. Everything except	766 // Replacing some parts of a file URL is not permitted. Everything except

766 // the host, path, query, and ref will be ignored.	767 // the host, path, query, and ref will be ignored.

767 bool ReplaceFileURL(const char* base,	768 GURL_API bool ReplaceFileURL(const char* base,

768 const url_parse::Parsed& base_parsed,	769 const url_parse::Parsed& base_parsed,

769 const Replacements<char>& replacements,	770 const Replacements<char>& replacements,

770 CharsetConverter* query_converter,	771 CharsetConverter* query_converter,

771 CanonOutput* output,	772 CanonOutput* output,

772 url_parse::Parsed* new_parsed);	773 url_parse::Parsed* new_parsed);

773 bool ReplaceFileURL(const char* base,	774 GURL_API bool ReplaceFileURL(const char* base,

774 const url_parse::Parsed& base_parsed,	775 const url_parse::Parsed& base_parsed,

775 const Replacements<char16>& replacements,	776 const Replacements<char16>& replacements,

776 CharsetConverter* query_converter,	777 CharsetConverter* query_converter,

777 CanonOutput* output,	778 CanonOutput* output,

778 url_parse::Parsed* new_parsed);	779 url_parse::Parsed* new_parsed);

779	780

780 // Path URLs can only have the scheme and path replaced. All other components	781 // Path URLs can only have the scheme and path replaced. All other components

781 // will be ignored.	782 // will be ignored.

782 bool ReplacePathURL(const char* base,	783 GURL_API bool ReplacePathURL(const char* base,

783 const url_parse::Parsed& base_parsed,	784 const url_parse::Parsed& base_parsed,

784 const Replacements<char>& replacements,	785 const Replacements<char>& replacements,

785 CanonOutput* output,	786 CanonOutput* output,

786 url_parse::Parsed* new_parsed);	787 url_parse::Parsed* new_parsed);

787 bool ReplacePathURL(const char* base,	788 GURL_API bool ReplacePathURL(const char* base,

788 const url_parse::Parsed& base_parsed,	789 const url_parse::Parsed& base_parsed,

789 const Replacements<char16>& replacements,	790 const Replacements<char16>& replacements,

790 CanonOutput* output,	791 CanonOutput* output,

791 url_parse::Parsed* new_parsed);	792 url_parse::Parsed* new_parsed);

792	793

793 // Mailto URLs can only have the scheme, path, and query replaced.	794 // Mailto URLs can only have the scheme, path, and query replaced.

794 // All other components will be ignored.	795 // All other components will be ignored.

795 bool ReplaceMailtoURL(const char* base,	796 GURL_API bool ReplaceMailtoURL(const char* base,

796 const url_parse::Parsed& base_parsed,	797 const url_parse::Parsed& base_parsed,

797 const Replacements<char>& replacements,	798 const Replacements<char>& replacements,

798 CanonOutput* output,	799 CanonOutput* output,

799 url_parse::Parsed* new_parsed);	800 url_parse::Parsed* new_parsed);

800 bool ReplaceMailtoURL(const char* base,	801 GURL_API bool ReplaceMailtoURL(const char* base,

801 const url_parse::Parsed& base_parsed,	802 const url_parse::Parsed& base_parsed,

802 const Replacements<char16>& replacements,	803 const Replacements<char16>& replacements,

803 CanonOutput* output,	804 CanonOutput* output,

804 url_parse::Parsed* new_parsed);	805 url_parse::Parsed* new_parsed);

805	806

806 // Relative URL ---------------------------------------------------------------	807 // Relative URL ---------------------------------------------------------------

807	808

808 // Given an input URL or URL fragment \|fragment\|, determines if it is a	809 // Given an input URL or URL fragment \|fragment\|, determines if it is a

809 // relative or absolute URL and places the result into \|*is_relative\|. If it is	810 // relative or absolute URL and places the result into \|*is_relative\|. If it is

810 // relative, the relevant portion of the URL will be placed into	811 // relative, the relevant portion of the URL will be placed into

811 // \|*relative_component\| (there may have been trimmed whitespace, for example).	812 // \|*relative_component\| (there may have been trimmed whitespace, for example).

812 // This value is passed to ResolveRelativeURL. If the input is not relative,	813 // This value is passed to ResolveRelativeURL. If the input is not relative,

813 // this value is UNDEFINED (it may be changed by the functin).	814 // this value is UNDEFINED (it may be changed by the functin).

814 //	815 //

815 // Returns true on success (we successfully determined the URL is relative or	816 // Returns true on success (we successfully determined the URL is relative or

816 // not). Failure means that the combination of URLs doesn't make any sense.	817 // not). Failure means that the combination of URLs doesn't make any sense.

817 //	818 //

818 // The base URL should always be canonical, therefore is ASCII.	819 // The base URL should always be canonical, therefore is ASCII.

819 bool IsRelativeURL(const char* base,	820 GURL_API bool IsRelativeURL(const char* base,

820 const url_parse::Parsed& base_parsed,	821 const url_parse::Parsed& base_parsed,

821 const char* fragment,	822 const char* fragment,

822 int fragment_len,	823 int fragment_len,

823 bool is_base_hierarchical,	824 bool is_base_hierarchical,

824 bool* is_relative,	825 bool* is_relative,

825 url_parse::Component* relative_component);	826 url_parse::Component* relative_component);

826 bool IsRelativeURL(const char* base,	827 GURL_API bool IsRelativeURL(const char* base,

827 const url_parse::Parsed& base_parsed,	828 const url_parse::Parsed& base_parsed,

828 const char16* fragment,	829 const char16* fragment,

829 int fragment_len,	830 int fragment_len,

830 bool is_base_hierarchical,	831 bool is_base_hierarchical,

831 bool* is_relative,	832 bool* is_relative,

832 url_parse::Component* relative_component);	833 url_parse::Component* relative_component);

833	834

834 // Given a canonical parsed source URL, a URL fragment known to be relative,	835 // Given a canonical parsed source URL, a URL fragment known to be relative,

835 // and the identified relevant portion of the relative URL (computed by	836 // and the identified relevant portion of the relative URL (computed by

836 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and	837 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and

837 // \|out_parsed\|.	838 // \|out_parsed\|.

838 //	839 //

839 // It also requires a flag indicating whether the base URL is a file: URL	840 // It also requires a flag indicating whether the base URL is a file: URL

840 // which triggers additional logic.	841 // which triggers additional logic.

841 //	842 //

842 // The base URL should be canonical and have a host (may be empty for file	843 // The base URL should be canonical and have a host (may be empty for file

843 // URLs) and a path. If it doesn't have these, we can't resolve relative	844 // URLs) and a path. If it doesn't have these, we can't resolve relative

844 // URLs off of it and will return the base as the output with an error flag.	845 // URLs off of it and will return the base as the output with an error flag.

845 // Becausee it is canonical is should also be ASCII.	846 // Becausee it is canonical is should also be ASCII.

846 //	847 //

847 // The query charset converter follows the same rules as CanonicalizeQuery.	848 // The query charset converter follows the same rules as CanonicalizeQuery.

848 //	849 //

849 // Returns true on success. On failure, the output will be "something	850 // Returns true on success. On failure, the output will be "something

850 // reasonable" that will be consistent and valid, just probably not what	851 // reasonable" that will be consistent and valid, just probably not what

851 // was intended by the web page author or caller.	852 // was intended by the web page author or caller.

852 bool ResolveRelativeURL(const char* base_url,	853 GURL_API bool ResolveRelativeURL(const char* base_url,

853 const url_parse::Parsed& base_parsed,	854 const url_parse::Parsed& base_parsed,

854 bool base_is_file,	855 bool base_is_file,

855 const char* relative_url,	856 const char* relative_url,

856 const url_parse::Component& relative_component,	857 const url_parse::Component& relative_component,

857 CharsetConverter* query_converter,	858 CharsetConverter* query_converter,

858 CanonOutput* output,	859 CanonOutput* output,

859 url_parse::Parsed* out_parsed);	860 url_parse::Parsed* out_parsed);

860 bool ResolveRelativeURL(const char* base_url,	861 GURL_API bool ResolveRelativeURL(const char* base_url,

861 const url_parse::Parsed& base_parsed,	862 const url_parse::Parsed& base_parsed,

862 bool base_is_file,	863 bool base_is_file,

863 const char16* relative_url,	864 const char16* relative_url,

864 const url_parse::Component& relative_component,	865 const url_parse::Component& relative_component,

865 CharsetConverter* query_converter,	866 CharsetConverter* query_converter,

866 CanonOutput* output,	867 CanonOutput* output,

867 url_parse::Parsed* out_parsed);	868 url_parse::Parsed* out_parsed);

868	869

869 } // namespace url_canon	870 } // namespace url_canon

870	871

871 #endif // GOOGLEURL_SRC_URL_CANON_H__	872 #endif // GOOGLEURL_SRC_URL_CANON_H__

OLD	NEW

« no previous file with comments | « src/gurl.h ('k') | src/url_canon_icu.h » ('j') | src/url_common.h » ('J')