trunk/src/url/url_canon.h - Issue 15848009: Revert 203025 "Make the copy of GURL in src/url buildable as a c..."

Side by Side Diff: trunk/src/url/url_canon.h

Issue 15848009: Revert 203025 "Make the copy of GURL in src/url buildable as a c..." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef URL_URL_CANON_H_	5 #ifndef URL_URL_CANON_H_

6 #define URL_URL_CANON_H_	6 #define URL_URL_CANON_H_

7	7

8 #include <stdlib.h>	8 #include <stdlib.h>

9 #include <string.h>	9 #include <string.h>

10	10

11 #include "base/string16.h"	11 #include "base/string16.h"

12 #include "url/url_export.h"

13 #include "url/url_parse.h"	12 #include "url/url_parse.h"

14	13

15 namespace url_canon {	14 namespace url_canon {

16	15

17 // Canonicalizer output -------------------------------------------------------	16 // Canonicalizer output -------------------------------------------------------

18	17

19 // Base class for the canonicalizer output, this maintains a buffer and	18 // Base class for the canonicalizer output, this maintains a buffer and

20 // supports simple resizing and append operations on it.	19 // supports simple resizing and append operations on it.

21 //	20 //

22 // It is VERY IMPORTANT that no virtual function calls be made on the common	21 // It is VERY IMPORTANT that no virtual function calls be made on the common

(...skipping 157 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};	179 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};

181	180

182 // Character set converter ----------------------------------------------------	181 // Character set converter ----------------------------------------------------

183 //	182 //

184 // Converts query strings into a custom encoding. The embedder can supply an	183 // Converts query strings into a custom encoding. The embedder can supply an

185 // implementation of this class to interface with their own character set	184 // implementation of this class to interface with their own character set

186 // conversion libraries.	185 // conversion libraries.

187 //	186 //

188 // Embedders will want to see the unit test for the ICU version.	187 // Embedders will want to see the unit test for the ICU version.

189	188

190 class URL_EXPORT CharsetConverter {	189 class CharsetConverter {

191 public:	190 public:

192 CharsetConverter() {}	191 CharsetConverter() {}

193 virtual ~CharsetConverter() {}	192 virtual ~CharsetConverter() {}

194	193

195 // Converts the given input string from UTF-16 to whatever output format the	194 // Converts the given input string from UTF-16 to whatever output format the

196 // converter supports. This is used only for the query encoding conversion,	195 // converter supports. This is used only for the query encoding conversion,

197 // which does not fail. Instead, the converter should insert "invalid	196 // which does not fail. Instead, the converter should insert "invalid

198 // character" characters in the output for invalid sequences, and do the	197 // character" characters in the output for invalid sequences, and do the

199 // best it can.	198 // best it can.

200 //	199 //

(...skipping 17 matching lines...) Expand all Loading...
218 // This should be called before parsing if whitespace removal is desired (which	217 // This should be called before parsing if whitespace removal is desired (which

219 // it normally is when you are canonicalizing).	218 // it normally is when you are canonicalizing).

220 //	219 //

221 // If no whitespace is removed, this function will not use the buffer and will	220 // If no whitespace is removed, this function will not use the buffer and will

222 // return a pointer to the input, to avoid the extra copy. If modification is	221 // return a pointer to the input, to avoid the extra copy. If modification is

223 // required, the given \|buffer\| will be used and the returned pointer will	222 // required, the given \|buffer\| will be used and the returned pointer will

224 // point to the beginning of the buffer.	223 // point to the beginning of the buffer.

225 //	224 //

226 // Therefore, callers should not use the buffer, since it may actuall be empty,	225 // Therefore, callers should not use the buffer, since it may actuall be empty,

227 // use the computed pointer and \|*output_len\| instead.	226 // use the computed pointer and \|*output_len\| instead.

228 URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len,	227 const char* RemoveURLWhitespace(const char* input, int input_len,

229 CanonOutputT<char>* buffer,	228 CanonOutputT<char>* buffer,

230 int* output_len);	229 int* output_len);

231 URL_EXPORT const char16* RemoveURLWhitespace(const char16* input, int input_len,	230 const char16* RemoveURLWhitespace(const char16* input, int input_len,

232 CanonOutputT<char16>* buffer,	231 CanonOutputT<char16>* buffer,

233 int* output_len);	232 int* output_len);

234	233

235 // IDN ------------------------------------------------------------------------	234 // IDN ------------------------------------------------------------------------

236	235

237 // Converts the Unicode input representing a hostname to ASCII using IDN rules.	236 // Converts the Unicode input representing a hostname to ASCII using IDN rules.

238 // The output must fall in the ASCII range, but will be encoded in UTF-16.	237 // The output must fall in the ASCII range, but will be encoded in UTF-16.

239 //	238 //

240 // On success, the output will be filled with the ASCII host name and it will	239 // On success, the output will be filled with the ASCII host name and it will

241 // return true. Unlike most other canonicalization functions, this assumes that	240 // return true. Unlike most other canonicalization functions, this assumes that

242 // the output is empty. The beginning of the host will be at offset 0, and	241 // the output is empty. The beginning of the host will be at offset 0, and

243 // the length of the output will be set to the length of the new host name.	242 // the length of the output will be set to the length of the new host name.

244 //	243 //

245 // On error, returns false. The output in this case is undefined.	244 // On error, returns false. The output in this case is undefined.

246 URL_EXPORT bool IDNToASCII(const char16* src,	245 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);

247 int src_len,

248 CanonOutputW* output);

249	246

250 // Piece-by-piece canonicalizers ----------------------------------------------	247 // Piece-by-piece canonicalizers ----------------------------------------------

251 //	248 //

252 // These individual canonicalizers append the canonicalized versions of the	249 // These individual canonicalizers append the canonicalized versions of the

253 // corresponding URL component to the given std::string. The spec and the	250 // corresponding URL component to the given std::string. The spec and the

254 // previously-identified range of that component are the input. The range of	251 // previously-identified range of that component are the input. The range of

255 // the canonicalized component will be written to the output component.	252 // the canonicalized component will be written to the output component.

256 //	253 //

257 // These functions all append to the output so they can be chained. Make sure	254 // These functions all append to the output so they can be chained. Make sure

258 // the output is empty when you start.	255 // the output is empty when you start.

259 //	256 //

260 // These functions returns boolean values indicating success. On failure, they	257 // These functions returns boolean values indicating success. On failure, they

261 // will attempt to write something reasonable to the output so that, if	258 // will attempt to write something reasonable to the output so that, if

262 // displayed to the user, they will recognise it as something that's messed up.	259 // displayed to the user, they will recognise it as something that's messed up.

263 // Nothing more should ever be done with these invalid URLs, however.	260 // Nothing more should ever be done with these invalid URLs, however.

264	261

265 // Scheme: Appends the scheme and colon to the URL. The output component will	262 // Scheme: Appends the scheme and colon to the URL. The output component will

266 // indicate the range of characters up to but not including the colon.	263 // indicate the range of characters up to but not including the colon.

267 //	264 //

268 // Canonical URLs always have a scheme. If the scheme is not present in the	265 // Canonical URLs always have a scheme. If the scheme is not present in the

269 // input, this will just write the colon to indicate an empty scheme. Does not	266 // input, this will just write the colon to indicate an empty scheme. Does not

270 // append slashes which will be needed before any authority components for most	267 // append slashes which will be needed before any authority components for most

271 // URLs.	268 // URLs.

272 //	269 //

273 // The 8-bit version requires UTF-8 encoding.	270 // The 8-bit version requires UTF-8 encoding.

274 URL_EXPORT bool CanonicalizeScheme(const char* spec,	271 bool CanonicalizeScheme(const char* spec,

275 const url_parse::Component& scheme,	272 const url_parse::Component& scheme,

276 CanonOutput* output,	273 CanonOutput* output,

277 url_parse::Component* out_scheme);	274 url_parse::Component* out_scheme);

278 URL_EXPORT bool CanonicalizeScheme(const char16* spec,	275 bool CanonicalizeScheme(const char16* spec,

279 const url_parse::Component& scheme,	276 const url_parse::Component& scheme,

280 CanonOutput* output,	277 CanonOutput* output,

281 url_parse::Component* out_scheme);	278 url_parse::Component* out_scheme);

282	279

283 // User info: username/password. If present, this will add the delimiters so	280 // User info: username/password. If present, this will add the delimiters so

284 // the output will be "<username>:<password>@" or "<username>@". Empty	281 // the output will be "<username>:<password>@" or "<username>@". Empty

285 // username/password pairs, or empty passwords, will get converted to	282 // username/password pairs, or empty passwords, will get converted to

286 // nonexistant in the canonical version.	283 // nonexistant in the canonical version.

287 //	284 //

288 // The components for the username and password refer to ranges in the	285 // The components for the username and password refer to ranges in the

289 // respective source strings. Usually, these will be the same string, which	286 // respective source strings. Usually, these will be the same string, which

290 // is legal as long as the two components don't overlap.	287 // is legal as long as the two components don't overlap.

291 //	288 //

292 // The 8-bit version requires UTF-8 encoding.	289 // The 8-bit version requires UTF-8 encoding.

293 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,	290 bool CanonicalizeUserInfo(const char* username_source,

294 const url_parse::Component& username,	291 const url_parse::Component& username,

295 const char* password_source,	292 const char* password_source,

296 const url_parse::Component& password,	293 const url_parse::Component& password,

297 CanonOutput* output,	294 CanonOutput* output,

298 url_parse::Component* out_username,	295 url_parse::Component* out_username,

299 url_parse::Component* out_password);	296 url_parse::Component* out_password);

300 URL_EXPORT bool CanonicalizeUserInfo(const char16* username_source,	297 bool CanonicalizeUserInfo(const char16* username_source,

301 const url_parse::Component& username,	298 const url_parse::Component& username,

302 const char16* password_source,	299 const char16* password_source,

303 const url_parse::Component& password,	300 const url_parse::Component& password,

304 CanonOutput* output,	301 CanonOutput* output,

305 url_parse::Component* out_username,	302 url_parse::Component* out_username,

306 url_parse::Component* out_password);	303 url_parse::Component* out_password);

307	304

308	305

309 // This structure holds detailed state exported from the IP/Host canonicalizers.	306 // This structure holds detailed state exported from the IP/Host canonicalizers.

310 // Additional fields may be added as callers require them.	307 // Additional fields may be added as callers require them.

311 struct CanonHostInfo {	308 struct CanonHostInfo {

312 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}	309 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}

313	310

314 // Convenience function to test if family is an IP address.	311 // Convenience function to test if family is an IP address.

315 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }	312 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }

316	313

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
349 int AddressLength() const {	346 int AddressLength() const {

350 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);	347 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);

351 }	348 }

352 };	349 };

353	350

354	351

355 // Host.	352 // Host.

356 //	353 //

357 // The 8-bit version requires UTF-8 encoding. Use this version when you only	354 // The 8-bit version requires UTF-8 encoding. Use this version when you only

358 // need to know whether canonicalization succeeded.	355 // need to know whether canonicalization succeeded.

359 URL_EXPORT bool CanonicalizeHost(const char* spec,	356 bool CanonicalizeHost(const char* spec,

360 const url_parse::Component& host,	357 const url_parse::Component& host,

361 CanonOutput* output,	358 CanonOutput* output,

362 url_parse::Component* out_host);	359 url_parse::Component* out_host);

363 URL_EXPORT bool CanonicalizeHost(const char16* spec,	360 bool CanonicalizeHost(const char16* spec,

364 const url_parse::Component& host,	361 const url_parse::Component& host,

365 CanonOutput* output,	362 CanonOutput* output,

366 url_parse::Component* out_host);	363 url_parse::Component* out_host);

367	364

368 // Extended version of CanonicalizeHost, which returns additional information.	365 // Extended version of CanonicalizeHost, which returns additional information.

369 // Use this when you need to know whether the hostname was an IP address.	366 // Use this when you need to know whether the hostname was an IP address.

370 // A successful return is indicated by host_info->family != BROKEN. See the	367 // A successful return is indicated by host_info->family != BROKEN. See the

371 // definition of CanonHostInfo above for details.	368 // definition of CanonHostInfo above for details.

372 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,	369 void CanonicalizeHostVerbose(const char* spec,

373 const url_parse::Component& host,	370 const url_parse::Component& host,

374 CanonOutput* output,	371 CanonOutput* output,

375 CanonHostInfo* host_info);	372 CanonHostInfo* host_info);

376 URL_EXPORT void CanonicalizeHostVerbose(const char16* spec,	373 void CanonicalizeHostVerbose(const char16* spec,

377 const url_parse::Component& host,	374 const url_parse::Component& host,

378 CanonOutput* output,	375 CanonOutput* output,

379 CanonHostInfo* host_info);	376 CanonHostInfo* host_info);

380	377

381	378

382 // IP addresses.	379 // IP addresses.

383 //	380 //

384 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is	381 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is

385 // an IP address, it will canonicalize it as such, appending it to \|output\|.	382 // an IP address, it will canonicalize it as such, appending it to \|output\|.

386 // Additional status information is returned via the \|*host_info\| parameter.	383 // Additional status information is returned via the \|*host_info\| parameter.

387 // See the definition of CanonHostInfo above for details.	384 // See the definition of CanonHostInfo above for details.

388 //	385 //

389 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that	386 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that

390 // the input is unescaped and name-prepped, etc. It should not normally be	387 // the input is unescaped and name-prepped, etc. It should not normally be

391 // necessary or wise to call this directly.	388 // necessary or wise to call this directly.

392 URL_EXPORT void CanonicalizeIPAddress(const char* spec,	389 void CanonicalizeIPAddress(const char* spec,

393 const url_parse::Component& host,	390 const url_parse::Component& host,

394 CanonOutput* output,	391 CanonOutput* output,

395 CanonHostInfo* host_info);	392 CanonHostInfo* host_info);

396 URL_EXPORT void CanonicalizeIPAddress(const char16* spec,	393 void CanonicalizeIPAddress(const char16* spec,

397 const url_parse::Component& host,	394 const url_parse::Component& host,

398 CanonOutput* output,	395 CanonOutput* output,

399 CanonHostInfo* host_info);	396 CanonHostInfo* host_info);

400	397

401 // Port: this function will add the colon for the port if a port is present.	398 // Port: this function will add the colon for the port if a port is present.

402 // The caller can pass url_parse::PORT_UNSPECIFIED as the	399 // The caller can pass url_parse::PORT_UNSPECIFIED as the

403 // default_port_for_scheme argument if there is no default port.	400 // default_port_for_scheme argument if there is no default port.

404 //	401 //

405 // The 8-bit version requires UTF-8 encoding.	402 // The 8-bit version requires UTF-8 encoding.

406 URL_EXPORT bool CanonicalizePort(const char* spec,	403 bool CanonicalizePort(const char* spec,

407 const url_parse::Component& port,	404 const url_parse::Component& port,

408 int default_port_for_scheme,	405 int default_port_for_scheme,

409 CanonOutput* output,	406 CanonOutput* output,

410 url_parse::Component* out_port);	407 url_parse::Component* out_port);

411 URL_EXPORT bool CanonicalizePort(const char16* spec,	408 bool CanonicalizePort(const char16* spec,

412 const url_parse::Component& port,	409 const url_parse::Component& port,

413 int default_port_for_scheme,	410 int default_port_for_scheme,

414 CanonOutput* output,	411 CanonOutput* output,

415 url_parse::Component* out_port);	412 url_parse::Component* out_port);

416	413

417 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED	414 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED

418 // if the scheme is unknown.	415 // if the scheme is unknown.

419 URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);	416 int DefaultPortForScheme(const char* scheme, int scheme_len);

420	417

421 // Path. If the input does not begin in a slash (including if the input is	418 // Path. If the input does not begin in a slash (including if the input is

422 // empty), we'll prepend a slash to the path to make it canonical.	419 // empty), we'll prepend a slash to the path to make it canonical.

423 //	420 //

424 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity	421 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity

425 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid	422 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid

426 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't	423 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't

427 // an issue. Somebody giving us an 8-bit path is responsible for generating	424 // an issue. Somebody giving us an 8-bit path is responsible for generating

428 // the path that the server expects (we'll escape high-bit characters), so	425 // the path that the server expects (we'll escape high-bit characters), so

429 // if something is invalid, it's their problem.	426 // if something is invalid, it's their problem.

430 URL_EXPORT bool CanonicalizePath(const char* spec,	427 bool CanonicalizePath(const char* spec,

431 const url_parse::Component& path,	428 const url_parse::Component& path,

432 CanonOutput* output,	429 CanonOutput* output,

433 url_parse::Component* out_path);	430 url_parse::Component* out_path);

434 URL_EXPORT bool CanonicalizePath(const char16* spec,	431 bool CanonicalizePath(const char16* spec,

435 const url_parse::Component& path,	432 const url_parse::Component& path,

436 CanonOutput* output,	433 CanonOutput* output,

437 url_parse::Component* out_path);	434 url_parse::Component* out_path);

438	435

439 // Canonicalizes the input as a file path. This is like CanonicalizePath except	436 // Canonicalizes the input as a file path. This is like CanonicalizePath except

440 // that it also handles Windows drive specs. For example, the path can begin	437 // that it also handles Windows drive specs. For example, the path can begin

441 // with "c\|\" and it will get properly canonicalized to "C:/".	438 // with "c\|\" and it will get properly canonicalized to "C:/".

442 // The string will be appended to \|output\| and \|out_path\| will be updated.	439 // The string will be appended to \|output\| and \|out_path\| will be updated.

443 //	440 //

444 // The 8-bit version requires UTF-8 encoding.	441 // The 8-bit version requires UTF-8 encoding.

445 URL_EXPORT bool FileCanonicalizePath(const char* spec,	442 bool FileCanonicalizePath(const char* spec,

446 const url_parse::Component& path,	443 const url_parse::Component& path,

447 CanonOutput* output,	444 CanonOutput* output,

448 url_parse::Component* out_path);	445 url_parse::Component* out_path);

449 URL_EXPORT bool FileCanonicalizePath(const char16* spec,	446 bool FileCanonicalizePath(const char16* spec,

450 const url_parse::Component& path,	447 const url_parse::Component& path,

451 CanonOutput* output,	448 CanonOutput* output,

452 url_parse::Component* out_path);	449 url_parse::Component* out_path);

453	450

454 // Query: Prepends the ? if needed.	451 // Query: Prepends the ? if needed.

455 //	452 //

456 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly	453 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly

457 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode	454 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode

458 // "invalid character." This function can not fail, we always just try to do	455 // "invalid character." This function can not fail, we always just try to do

459 // our best for crazy input here since web pages can set it themselves.	456 // our best for crazy input here since web pages can set it themselves.

460 //	457 //

461 // This will convert the given input into the output encoding that the given	458 // This will convert the given input into the output encoding that the given

462 // character set converter object provides. The converter will only be called	459 // character set converter object provides. The converter will only be called

463 // if necessary, for ASCII input, no conversions are necessary.	460 // if necessary, for ASCII input, no conversions are necessary.

464 //	461 //

465 // The converter can be NULL. In this case, the output encoding will be UTF-8.	462 // The converter can be NULL. In this case, the output encoding will be UTF-8.

466 URL_EXPORT void CanonicalizeQuery(const char* spec,	463 void CanonicalizeQuery(const char* spec,

467 const url_parse::Component& query,	464 const url_parse::Component& query,

468 CharsetConverter* converter,	465 CharsetConverter* converter,

469 CanonOutput* output,	466 CanonOutput* output,

470 url_parse::Component* out_query);	467 url_parse::Component* out_query);

471 URL_EXPORT void CanonicalizeQuery(const char16* spec,	468 void CanonicalizeQuery(const char16* spec,

472 const url_parse::Component& query,	469 const url_parse::Component& query,

473 CharsetConverter* converter,	470 CharsetConverter* converter,

474 CanonOutput* output,	471 CanonOutput* output,

475 url_parse::Component* out_query);	472 url_parse::Component* out_query);

476	473

477 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only	474 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only

478 // canonicalizer that does not produce ASCII output). The output is	475 // canonicalizer that does not produce ASCII output). The output is

479 // guaranteed to be valid UTF-8.	476 // guaranteed to be valid UTF-8.

480 //	477 //

481 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use	478 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use

482 // the "Unicode replacement character" for the confusing bits and copy the rest.	479 // the "Unicode replacement character" for the confusing bits and copy the rest.

483 URL_EXPORT void CanonicalizeRef(const char* spec,	480 void CanonicalizeRef(const char* spec,

484 const url_parse::Component& path,	481 const url_parse::Component& path,

485 CanonOutput* output,	482 CanonOutput* output,

486 url_parse::Component* out_path);	483 url_parse::Component* out_path);

487 URL_EXPORT void CanonicalizeRef(const char16* spec,	484 void CanonicalizeRef(const char16* spec,

488 const url_parse::Component& path,	485 const url_parse::Component& path,

489 CanonOutput* output,	486 CanonOutput* output,

490 url_parse::Component* out_path);	487 url_parse::Component* out_path);

491	488

492 // Full canonicalizer ---------------------------------------------------------	489 // Full canonicalizer ---------------------------------------------------------

493 //	490 //

494 // These functions replace any string contents, rather than append as above.	491 // These functions replace any string contents, rather than append as above.

495 // See the above piece-by-piece functions for information specific to	492 // See the above piece-by-piece functions for information specific to

496 // canonicalizing individual components.	493 // canonicalizing individual components.

497 //	494 //

498 // The output will be ASCII except the reference fragment, which may be UTF-8.	495 // The output will be ASCII except the reference fragment, which may be UTF-8.

499 //	496 //

500 // The 8-bit versions require UTF-8 encoding.	497 // The 8-bit versions require UTF-8 encoding.

501	498

502 // Use for standard URLs with authorities and paths.	499 // Use for standard URLs with authorities and paths.

503 URL_EXPORT bool CanonicalizeStandardURL(const char* spec,	500 bool CanonicalizeStandardURL(const char* spec,

504 int spec_len,	501 int spec_len,

505 const url_parse::Parsed& parsed,	502 const url_parse::Parsed& parsed,

506 CharsetConverter* query_converter,	503 CharsetConverter* query_converter,

507 CanonOutput* output,	504 CanonOutput* output,

508 url_parse::Parsed* new_parsed);	505 url_parse::Parsed* new_parsed);

509 URL_EXPORT bool CanonicalizeStandardURL(const char16* spec,	506 bool CanonicalizeStandardURL(const char16* spec,

510 int spec_len,	507 int spec_len,

511 const url_parse::Parsed& parsed,	508 const url_parse::Parsed& parsed,

512 CharsetConverter* query_converter,	509 CharsetConverter* query_converter,

513 CanonOutput* output,	510 CanonOutput* output,

514 url_parse::Parsed* new_parsed);	511 url_parse::Parsed* new_parsed);

515	512

516 // Use for file URLs.	513 // Use for file URLs.

517 URL_EXPORT bool CanonicalizeFileURL(const char* spec,	514 bool CanonicalizeFileURL(const char* spec,

518 int spec_len,	515 int spec_len,

519 const url_parse::Parsed& parsed,	516 const url_parse::Parsed& parsed,

520 CharsetConverter* query_converter,	517 CharsetConverter* query_converter,

521 CanonOutput* output,	518 CanonOutput* output,

522 url_parse::Parsed* new_parsed);	519 url_parse::Parsed* new_parsed);

523 URL_EXPORT bool CanonicalizeFileURL(const char16* spec,	520 bool CanonicalizeFileURL(const char16* spec,

524 int spec_len,	521 int spec_len,

525 const url_parse::Parsed& parsed,	522 const url_parse::Parsed& parsed,

526 CharsetConverter* query_converter,	523 CharsetConverter* query_converter,

527 CanonOutput* output,	524 CanonOutput* output,

528 url_parse::Parsed* new_parsed);	525 url_parse::Parsed* new_parsed);

529	526

530 // Use for filesystem URLs.	527 // Use for filesystem URLs.

531 URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,	528 bool CanonicalizeFileSystemURL(const char* spec,

532 int spec_len,	529 int spec_len,

533 const url_parse::Parsed& parsed,	530 const url_parse::Parsed& parsed,

534 CharsetConverter* query_converter,	531 CharsetConverter* query_converter,

535 CanonOutput* output,	532 CanonOutput* output,

536 url_parse::Parsed* new_parsed);	533 url_parse::Parsed* new_parsed);

537 URL_EXPORT bool CanonicalizeFileSystemURL(const char16* spec,	534 bool CanonicalizeFileSystemURL(const char16* spec,

538 int spec_len,	535 int spec_len,

539 const url_parse::Parsed& parsed,	536 const url_parse::Parsed& parsed,

540 CharsetConverter* query_converter,	537 CharsetConverter* query_converter,

541 CanonOutput* output,	538 CanonOutput* output,

542 url_parse::Parsed* new_parsed);	539 url_parse::Parsed* new_parsed);

543	540

544 // Use for path URLs such as javascript. This does not modify the path in any	541 // Use for path URLs such as javascript. This does not modify the path in any

545 // way, for example, by escaping it.	542 // way, for example, by escaping it.

546 URL_EXPORT bool CanonicalizePathURL(const char* spec,	543 bool CanonicalizePathURL(const char* spec,

547 int spec_len,	544 int spec_len,

548 const url_parse::Parsed& parsed,	545 const url_parse::Parsed& parsed,

549 CanonOutput* output,	546 CanonOutput* output,

550 url_parse::Parsed* new_parsed);	547 url_parse::Parsed* new_parsed);

551 URL_EXPORT bool CanonicalizePathURL(const char16* spec,	548 bool CanonicalizePathURL(const char16* spec,

552 int spec_len,	549 int spec_len,

553 const url_parse::Parsed& parsed,	550 const url_parse::Parsed& parsed,

554 CanonOutput* output,	551 CanonOutput* output,

555 url_parse::Parsed* new_parsed);	552 url_parse::Parsed* new_parsed);

556	553

557 // Use for mailto URLs. This "canonicalizes" the url into a path and query	554 // Use for mailto URLs. This "canonicalizes" the url into a path and query

558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for	555 // component. It does not attempt to merge "to" fields. It uses UTF-8 for

559 // the query encoding if there is a query. This is because a mailto URL is	556 // the query encoding if there is a query. This is because a mailto URL is

560 // really intended for an external mail program, and the encoding of a page,	557 // really intended for an external mail program, and the encoding of a page,

561 // etc. which would influence a query encoding normally are irrelevant.	558 // etc. which would influence a query encoding normally are irrelevant.

562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,	559 bool CanonicalizeMailtoURL(const char* spec,

563 int spec_len,	560 int spec_len,

564 const url_parse::Parsed& parsed,	561 const url_parse::Parsed& parsed,

565 CanonOutput* output,	562 CanonOutput* output,

566 url_parse::Parsed* new_parsed);	563 url_parse::Parsed* new_parsed);

567 URL_EXPORT bool CanonicalizeMailtoURL(const char16* spec,	564 bool CanonicalizeMailtoURL(const char16* spec,

568 int spec_len,	565 int spec_len,

569 const url_parse::Parsed& parsed,	566 const url_parse::Parsed& parsed,

570 CanonOutput* output,	567 CanonOutput* output,

571 url_parse::Parsed* new_parsed);	568 url_parse::Parsed* new_parsed);

572	569

573 // Part replacer --------------------------------------------------------------	570 // Part replacer --------------------------------------------------------------

574	571

575 // Internal structure used for storing separate strings for each component.	572 // Internal structure used for storing separate strings for each component.

576 // The basic canonicalization functions use this structure internally so that	573 // The basic canonicalization functions use this structure internally so that

577 // component replacement (different strings for different components) can be	574 // component replacement (different strings for different components) can be

578 // treated on the same code path as regular canonicalization (the same string	575 // treated on the same code path as regular canonicalization (the same string

579 // for each component).	576 // for each component).

580 //	577 //

581 // A url_parse::Parsed structure usually goes along with this. Those	578 // A url_parse::Parsed structure usually goes along with this. Those

(...skipping 164 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
746 // Replace component \| (replacement string) (replacement component)	743 // Replace component \| (replacement string) (replacement component)

747 // Delete component \| (non-NULL) (invalid component: (0,-1))	744 // Delete component \| (non-NULL) (invalid component: (0,-1))

748 //	745 //

749 // We use a pointer to the empty string for the source when the component	746 // We use a pointer to the empty string for the source when the component

750 // should be deleted.	747 // should be deleted.

751 URLComponentSource<CHAR> sources_;	748 URLComponentSource<CHAR> sources_;

752 url_parse::Parsed components_;	749 url_parse::Parsed components_;

753 };	750 };

754	751

755 // The base must be an 8-bit canonical URL.	752 // The base must be an 8-bit canonical URL.

756 URL_EXPORT bool ReplaceStandardURL(const char* base,	753 bool ReplaceStandardURL(const char* base,

757 const url_parse::Parsed& base_parsed,	754 const url_parse::Parsed& base_parsed,

758 const Replacements<char>& replacements,	755 const Replacements<char>& replacements,

759 CharsetConverter* query_converter,	756 CharsetConverter* query_converter,

760 CanonOutput* output,	757 CanonOutput* output,

761 url_parse::Parsed* new_parsed);	758 url_parse::Parsed* new_parsed);

762 URL_EXPORT bool ReplaceStandardURL(const char* base,	759 bool ReplaceStandardURL(const char* base,

763 const url_parse::Parsed& base_parsed,	760 const url_parse::Parsed& base_parsed,

764 const Replacements<char16>& replacements,	761 const Replacements<char16>& replacements,

765 CharsetConverter* query_converter,	762 CharsetConverter* query_converter,

766 CanonOutput* output,	763 CanonOutput* output,

767 url_parse::Parsed* new_parsed);	764 url_parse::Parsed* new_parsed);

768	765

769 // Filesystem URLs can only have the path, query, or ref replaced.	766 // Filesystem URLs can only have the path, query, or ref replaced.

770 // All other components will be ignored.	767 // All other components will be ignored.

771 URL_EXPORT bool ReplaceFileSystemURL(const char* base,	768 bool ReplaceFileSystemURL(const char* base,

772 const url_parse::Parsed& base_parsed,	769 const url_parse::Parsed& base_parsed,

773 const Replacements<char>& replacements,	770 const Replacements<char>& replacements,

774 CharsetConverter* query_converter,	771 CharsetConverter* query_converter,

775 CanonOutput* output,	772 CanonOutput* output,

776 url_parse::Parsed* new_parsed);	773 url_parse::Parsed* new_parsed);

777 URL_EXPORT bool ReplaceFileSystemURL(const char* base,	774 bool ReplaceFileSystemURL(const char* base,

778 const url_parse::Parsed& base_parsed,	775 const url_parse::Parsed& base_parsed,

779 const Replacements<char16>& replacements,	776 const Replacements<char16>& replacements,

780 CharsetConverter* query_converter,	777 CharsetConverter* query_converter,

781 CanonOutput* output,	778 CanonOutput* output,

782 url_parse::Parsed* new_parsed);	779 url_parse::Parsed* new_parsed);

783	780

784 // Replacing some parts of a file URL is not permitted. Everything except	781 // Replacing some parts of a file URL is not permitted. Everything except

785 // the host, path, query, and ref will be ignored.	782 // the host, path, query, and ref will be ignored.

786 URL_EXPORT bool ReplaceFileURL(const char* base,	783 bool ReplaceFileURL(const char* base,

787 const url_parse::Parsed& base_parsed,	784 const url_parse::Parsed& base_parsed,

788 const Replacements<char>& replacements,	785 const Replacements<char>& replacements,

789 CharsetConverter* query_converter,	786 CharsetConverter* query_converter,

790 CanonOutput* output,	787 CanonOutput* output,

791 url_parse::Parsed* new_parsed);	788 url_parse::Parsed* new_parsed);

792 URL_EXPORT bool ReplaceFileURL(const char* base,	789 bool ReplaceFileURL(const char* base,

793 const url_parse::Parsed& base_parsed,	790 const url_parse::Parsed& base_parsed,

794 const Replacements<char16>& replacements,	791 const Replacements<char16>& replacements,

795 CharsetConverter* query_converter,	792 CharsetConverter* query_converter,

796 CanonOutput* output,	793 CanonOutput* output,

797 url_parse::Parsed* new_parsed);	794 url_parse::Parsed* new_parsed);

798	795

799 // Path URLs can only have the scheme and path replaced. All other components	796 // Path URLs can only have the scheme and path replaced. All other components

800 // will be ignored.	797 // will be ignored.

801 URL_EXPORT bool ReplacePathURL(const char* base,	798 bool ReplacePathURL(const char* base,

802 const url_parse::Parsed& base_parsed,	799 const url_parse::Parsed& base_parsed,

803 const Replacements<char>& replacements,	800 const Replacements<char>& replacements,

804 CanonOutput* output,	801 CanonOutput* output,

805 url_parse::Parsed* new_parsed);	802 url_parse::Parsed* new_parsed);

806 URL_EXPORT bool ReplacePathURL(const char* base,	803 bool ReplacePathURL(const char* base,

807 const url_parse::Parsed& base_parsed,	804 const url_parse::Parsed& base_parsed,

808 const Replacements<char16>& replacements,	805 const Replacements<char16>& replacements,

809 CanonOutput* output,	806 CanonOutput* output,

810 url_parse::Parsed* new_parsed);	807 url_parse::Parsed* new_parsed);

811	808

812 // Mailto URLs can only have the scheme, path, and query replaced.	809 // Mailto URLs can only have the scheme, path, and query replaced.

813 // All other components will be ignored.	810 // All other components will be ignored.

814 URL_EXPORT bool ReplaceMailtoURL(const char* base,	811 bool ReplaceMailtoURL(const char* base,

815 const url_parse::Parsed& base_parsed,	812 const url_parse::Parsed& base_parsed,

816 const Replacements<char>& replacements,	813 const Replacements<char>& replacements,

817 CanonOutput* output,	814 CanonOutput* output,

818 url_parse::Parsed* new_parsed);	815 url_parse::Parsed* new_parsed);

819 URL_EXPORT bool ReplaceMailtoURL(const char* base,	816 bool ReplaceMailtoURL(const char* base,

820 const url_parse::Parsed& base_parsed,	817 const url_parse::Parsed& base_parsed,

821 const Replacements<char16>& replacements,	818 const Replacements<char16>& replacements,

822 CanonOutput* output,	819 CanonOutput* output,

823 url_parse::Parsed* new_parsed);	820 url_parse::Parsed* new_parsed);

824	821

825 // Relative URL ---------------------------------------------------------------	822 // Relative URL ---------------------------------------------------------------

826	823

827 // Given an input URL or URL fragment \|fragment\|, determines if it is a	824 // Given an input URL or URL fragment \|fragment\|, determines if it is a

828 // relative or absolute URL and places the result into \|*is_relative\|. If it is	825 // relative or absolute URL and places the result into \|*is_relative\|. If it is

829 // relative, the relevant portion of the URL will be placed into	826 // relative, the relevant portion of the URL will be placed into

830 // \|*relative_component\| (there may have been trimmed whitespace, for example).	827 // \|*relative_component\| (there may have been trimmed whitespace, for example).

831 // This value is passed to ResolveRelativeURL. If the input is not relative,	828 // This value is passed to ResolveRelativeURL. If the input is not relative,

832 // this value is UNDEFINED (it may be changed by the function).	829 // this value is UNDEFINED (it may be changed by the function).

833 //	830 //

834 // Returns true on success (we successfully determined the URL is relative or	831 // Returns true on success (we successfully determined the URL is relative or

835 // not). Failure means that the combination of URLs doesn't make any sense.	832 // not). Failure means that the combination of URLs doesn't make any sense.

836 //	833 //

837 // The base URL should always be canonical, therefore is ASCII.	834 // The base URL should always be canonical, therefore is ASCII.

838 URL_EXPORT bool IsRelativeURL(const char* base,	835 bool IsRelativeURL(const char* base,

839 const url_parse::Parsed& base_parsed,	836 const url_parse::Parsed& base_parsed,

840 const char* fragment,	837 const char* fragment,

841 int fragment_len,	838 int fragment_len,

842 bool is_base_hierarchical,	839 bool is_base_hierarchical,

843 bool* is_relative,	840 bool* is_relative,

844 url_parse::Component* relative_component);	841 url_parse::Component* relative_component);

845 URL_EXPORT bool IsRelativeURL(const char* base,	842 bool IsRelativeURL(const char* base,

846 const url_parse::Parsed& base_parsed,	843 const url_parse::Parsed& base_parsed,

847 const char16* fragment,	844 const char16* fragment,

848 int fragment_len,	845 int fragment_len,

849 bool is_base_hierarchical,	846 bool is_base_hierarchical,

850 bool* is_relative,	847 bool* is_relative,

851 url_parse::Component* relative_component);	848 url_parse::Component* relative_component);

852	849

853 // Given a canonical parsed source URL, a URL fragment known to be relative,	850 // Given a canonical parsed source URL, a URL fragment known to be relative,

854 // and the identified relevant portion of the relative URL (computed by	851 // and the identified relevant portion of the relative URL (computed by

855 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and	852 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and

856 // \|out_parsed\|.	853 // \|out_parsed\|.

857 //	854 //

858 // It also requires a flag indicating whether the base URL is a file: URL	855 // It also requires a flag indicating whether the base URL is a file: URL

859 // which triggers additional logic.	856 // which triggers additional logic.

860 //	857 //

861 // The base URL should be canonical and have a host (may be empty for file	858 // The base URL should be canonical and have a host (may be empty for file

862 // URLs) and a path. If it doesn't have these, we can't resolve relative	859 // URLs) and a path. If it doesn't have these, we can't resolve relative

863 // URLs off of it and will return the base as the output with an error flag.	860 // URLs off of it and will return the base as the output with an error flag.

864 // Becausee it is canonical is should also be ASCII.	861 // Becausee it is canonical is should also be ASCII.

865 //	862 //

866 // The query charset converter follows the same rules as CanonicalizeQuery.	863 // The query charset converter follows the same rules as CanonicalizeQuery.

867 //	864 //

868 // Returns true on success. On failure, the output will be "something	865 // Returns true on success. On failure, the output will be "something

869 // reasonable" that will be consistent and valid, just probably not what	866 // reasonable" that will be consistent and valid, just probably not what

870 // was intended by the web page author or caller.	867 // was intended by the web page author or caller.

871 URL_EXPORT bool ResolveRelativeURL(	868 bool ResolveRelativeURL(const char* base_url,

872 const char* base_url,	869 const url_parse::Parsed& base_parsed,

873 const url_parse::Parsed& base_parsed,	870 bool base_is_file,

874 bool base_is_file,	871 const char* relative_url,

875 const char* relative_url,	872 const url_parse::Component& relative_component,

876 const url_parse::Component& relative_component,	873 CharsetConverter* query_converter,

877 CharsetConverter* query_converter,	874 CanonOutput* output,

878 CanonOutput* output,	875 url_parse::Parsed* out_parsed);

879 url_parse::Parsed* out_parsed);	876 bool ResolveRelativeURL(const char* base_url,

880 URL_EXPORT bool ResolveRelativeURL(	877 const url_parse::Parsed& base_parsed,

881 const char* base_url,	878 bool base_is_file,

882 const url_parse::Parsed& base_parsed,	879 const char16* relative_url,

883 bool base_is_file,	880 const url_parse::Component& relative_component,

884 const char16* relative_url,	881 CharsetConverter* query_converter,

885 const url_parse::Component& relative_component,	882 CanonOutput* output,

886 CharsetConverter* query_converter,	883 url_parse::Parsed* out_parsed);

887 CanonOutput* output,

888 url_parse::Parsed* out_parsed);

889	884

890 } // namespace url_canon	885 } // namespace url_canon

891	886

892 #endif // URL_URL_CANON_H_	887 #endif // URL_URL_CANON_H_

OLD	NEW

« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »