Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1311)

Side by Side Diff: trunk/src/url/url_canon.h

Issue 15848009: Revert 203025 "Make the copy of GURL in src/url buildable as a c..." (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef URL_URL_CANON_H_ 5 #ifndef URL_URL_CANON_H_
6 #define URL_URL_CANON_H_ 6 #define URL_URL_CANON_H_
7 7
8 #include <stdlib.h> 8 #include <stdlib.h>
9 #include <string.h> 9 #include <string.h>
10 10
11 #include "base/string16.h" 11 #include "base/string16.h"
12 #include "url/url_export.h"
13 #include "url/url_parse.h" 12 #include "url/url_parse.h"
14 13
15 namespace url_canon { 14 namespace url_canon {
16 15
17 // Canonicalizer output ------------------------------------------------------- 16 // Canonicalizer output -------------------------------------------------------
18 17
19 // Base class for the canonicalizer output, this maintains a buffer and 18 // Base class for the canonicalizer output, this maintains a buffer and
20 // supports simple resizing and append operations on it. 19 // supports simple resizing and append operations on it.
21 // 20 //
22 // It is VERY IMPORTANT that no virtual function calls be made on the common 21 // It is VERY IMPORTANT that no virtual function calls be made on the common
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {}; 179 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};
181 180
182 // Character set converter ---------------------------------------------------- 181 // Character set converter ----------------------------------------------------
183 // 182 //
184 // Converts query strings into a custom encoding. The embedder can supply an 183 // Converts query strings into a custom encoding. The embedder can supply an
185 // implementation of this class to interface with their own character set 184 // implementation of this class to interface with their own character set
186 // conversion libraries. 185 // conversion libraries.
187 // 186 //
188 // Embedders will want to see the unit test for the ICU version. 187 // Embedders will want to see the unit test for the ICU version.
189 188
190 class URL_EXPORT CharsetConverter { 189 class CharsetConverter {
191 public: 190 public:
192 CharsetConverter() {} 191 CharsetConverter() {}
193 virtual ~CharsetConverter() {} 192 virtual ~CharsetConverter() {}
194 193
195 // Converts the given input string from UTF-16 to whatever output format the 194 // Converts the given input string from UTF-16 to whatever output format the
196 // converter supports. This is used only for the query encoding conversion, 195 // converter supports. This is used only for the query encoding conversion,
197 // which does not fail. Instead, the converter should insert "invalid 196 // which does not fail. Instead, the converter should insert "invalid
198 // character" characters in the output for invalid sequences, and do the 197 // character" characters in the output for invalid sequences, and do the
199 // best it can. 198 // best it can.
200 // 199 //
(...skipping 17 matching lines...) Expand all
218 // This should be called before parsing if whitespace removal is desired (which 217 // This should be called before parsing if whitespace removal is desired (which
219 // it normally is when you are canonicalizing). 218 // it normally is when you are canonicalizing).
220 // 219 //
221 // If no whitespace is removed, this function will not use the buffer and will 220 // If no whitespace is removed, this function will not use the buffer and will
222 // return a pointer to the input, to avoid the extra copy. If modification is 221 // return a pointer to the input, to avoid the extra copy. If modification is
223 // required, the given |buffer| will be used and the returned pointer will 222 // required, the given |buffer| will be used and the returned pointer will
224 // point to the beginning of the buffer. 223 // point to the beginning of the buffer.
225 // 224 //
226 // Therefore, callers should not use the buffer, since it may actuall be empty, 225 // Therefore, callers should not use the buffer, since it may actuall be empty,
227 // use the computed pointer and |*output_len| instead. 226 // use the computed pointer and |*output_len| instead.
228 URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len, 227 const char* RemoveURLWhitespace(const char* input, int input_len,
229 CanonOutputT<char>* buffer, 228 CanonOutputT<char>* buffer,
230 int* output_len); 229 int* output_len);
231 URL_EXPORT const char16* RemoveURLWhitespace(const char16* input, int input_len, 230 const char16* RemoveURLWhitespace(const char16* input, int input_len,
232 CanonOutputT<char16>* buffer, 231 CanonOutputT<char16>* buffer,
233 int* output_len); 232 int* output_len);
234 233
235 // IDN ------------------------------------------------------------------------ 234 // IDN ------------------------------------------------------------------------
236 235
237 // Converts the Unicode input representing a hostname to ASCII using IDN rules. 236 // Converts the Unicode input representing a hostname to ASCII using IDN rules.
238 // The output must fall in the ASCII range, but will be encoded in UTF-16. 237 // The output must fall in the ASCII range, but will be encoded in UTF-16.
239 // 238 //
240 // On success, the output will be filled with the ASCII host name and it will 239 // On success, the output will be filled with the ASCII host name and it will
241 // return true. Unlike most other canonicalization functions, this assumes that 240 // return true. Unlike most other canonicalization functions, this assumes that
242 // the output is empty. The beginning of the host will be at offset 0, and 241 // the output is empty. The beginning of the host will be at offset 0, and
243 // the length of the output will be set to the length of the new host name. 242 // the length of the output will be set to the length of the new host name.
244 // 243 //
245 // On error, returns false. The output in this case is undefined. 244 // On error, returns false. The output in this case is undefined.
246 URL_EXPORT bool IDNToASCII(const char16* src, 245 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
247 int src_len,
248 CanonOutputW* output);
249 246
250 // Piece-by-piece canonicalizers ---------------------------------------------- 247 // Piece-by-piece canonicalizers ----------------------------------------------
251 // 248 //
252 // These individual canonicalizers append the canonicalized versions of the 249 // These individual canonicalizers append the canonicalized versions of the
253 // corresponding URL component to the given std::string. The spec and the 250 // corresponding URL component to the given std::string. The spec and the
254 // previously-identified range of that component are the input. The range of 251 // previously-identified range of that component are the input. The range of
255 // the canonicalized component will be written to the output component. 252 // the canonicalized component will be written to the output component.
256 // 253 //
257 // These functions all append to the output so they can be chained. Make sure 254 // These functions all append to the output so they can be chained. Make sure
258 // the output is empty when you start. 255 // the output is empty when you start.
259 // 256 //
260 // These functions returns boolean values indicating success. On failure, they 257 // These functions returns boolean values indicating success. On failure, they
261 // will attempt to write something reasonable to the output so that, if 258 // will attempt to write something reasonable to the output so that, if
262 // displayed to the user, they will recognise it as something that's messed up. 259 // displayed to the user, they will recognise it as something that's messed up.
263 // Nothing more should ever be done with these invalid URLs, however. 260 // Nothing more should ever be done with these invalid URLs, however.
264 261
265 // Scheme: Appends the scheme and colon to the URL. The output component will 262 // Scheme: Appends the scheme and colon to the URL. The output component will
266 // indicate the range of characters up to but not including the colon. 263 // indicate the range of characters up to but not including the colon.
267 // 264 //
268 // Canonical URLs always have a scheme. If the scheme is not present in the 265 // Canonical URLs always have a scheme. If the scheme is not present in the
269 // input, this will just write the colon to indicate an empty scheme. Does not 266 // input, this will just write the colon to indicate an empty scheme. Does not
270 // append slashes which will be needed before any authority components for most 267 // append slashes which will be needed before any authority components for most
271 // URLs. 268 // URLs.
272 // 269 //
273 // The 8-bit version requires UTF-8 encoding. 270 // The 8-bit version requires UTF-8 encoding.
274 URL_EXPORT bool CanonicalizeScheme(const char* spec, 271 bool CanonicalizeScheme(const char* spec,
275 const url_parse::Component& scheme, 272 const url_parse::Component& scheme,
276 CanonOutput* output, 273 CanonOutput* output,
277 url_parse::Component* out_scheme); 274 url_parse::Component* out_scheme);
278 URL_EXPORT bool CanonicalizeScheme(const char16* spec, 275 bool CanonicalizeScheme(const char16* spec,
279 const url_parse::Component& scheme, 276 const url_parse::Component& scheme,
280 CanonOutput* output, 277 CanonOutput* output,
281 url_parse::Component* out_scheme); 278 url_parse::Component* out_scheme);
282 279
283 // User info: username/password. If present, this will add the delimiters so 280 // User info: username/password. If present, this will add the delimiters so
284 // the output will be "<username>:<password>@" or "<username>@". Empty 281 // the output will be "<username>:<password>@" or "<username>@". Empty
285 // username/password pairs, or empty passwords, will get converted to 282 // username/password pairs, or empty passwords, will get converted to
286 // nonexistant in the canonical version. 283 // nonexistant in the canonical version.
287 // 284 //
288 // The components for the username and password refer to ranges in the 285 // The components for the username and password refer to ranges in the
289 // respective source strings. Usually, these will be the same string, which 286 // respective source strings. Usually, these will be the same string, which
290 // is legal as long as the two components don't overlap. 287 // is legal as long as the two components don't overlap.
291 // 288 //
292 // The 8-bit version requires UTF-8 encoding. 289 // The 8-bit version requires UTF-8 encoding.
293 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source, 290 bool CanonicalizeUserInfo(const char* username_source,
294 const url_parse::Component& username, 291 const url_parse::Component& username,
295 const char* password_source, 292 const char* password_source,
296 const url_parse::Component& password, 293 const url_parse::Component& password,
297 CanonOutput* output, 294 CanonOutput* output,
298 url_parse::Component* out_username, 295 url_parse::Component* out_username,
299 url_parse::Component* out_password); 296 url_parse::Component* out_password);
300 URL_EXPORT bool CanonicalizeUserInfo(const char16* username_source, 297 bool CanonicalizeUserInfo(const char16* username_source,
301 const url_parse::Component& username, 298 const url_parse::Component& username,
302 const char16* password_source, 299 const char16* password_source,
303 const url_parse::Component& password, 300 const url_parse::Component& password,
304 CanonOutput* output, 301 CanonOutput* output,
305 url_parse::Component* out_username, 302 url_parse::Component* out_username,
306 url_parse::Component* out_password); 303 url_parse::Component* out_password);
307 304
308 305
309 // This structure holds detailed state exported from the IP/Host canonicalizers. 306 // This structure holds detailed state exported from the IP/Host canonicalizers.
310 // Additional fields may be added as callers require them. 307 // Additional fields may be added as callers require them.
311 struct CanonHostInfo { 308 struct CanonHostInfo {
312 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} 309 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
313 310
314 // Convenience function to test if family is an IP address. 311 // Convenience function to test if family is an IP address.
315 bool IsIPAddress() const { return family == IPV4 || family == IPV6; } 312 bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
316 313
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
349 int AddressLength() const { 346 int AddressLength() const {
350 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0); 347 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
351 } 348 }
352 }; 349 };
353 350
354 351
355 // Host. 352 // Host.
356 // 353 //
357 // The 8-bit version requires UTF-8 encoding. Use this version when you only 354 // The 8-bit version requires UTF-8 encoding. Use this version when you only
358 // need to know whether canonicalization succeeded. 355 // need to know whether canonicalization succeeded.
359 URL_EXPORT bool CanonicalizeHost(const char* spec, 356 bool CanonicalizeHost(const char* spec,
360 const url_parse::Component& host, 357 const url_parse::Component& host,
361 CanonOutput* output, 358 CanonOutput* output,
362 url_parse::Component* out_host); 359 url_parse::Component* out_host);
363 URL_EXPORT bool CanonicalizeHost(const char16* spec, 360 bool CanonicalizeHost(const char16* spec,
364 const url_parse::Component& host, 361 const url_parse::Component& host,
365 CanonOutput* output, 362 CanonOutput* output,
366 url_parse::Component* out_host); 363 url_parse::Component* out_host);
367 364
368 // Extended version of CanonicalizeHost, which returns additional information. 365 // Extended version of CanonicalizeHost, which returns additional information.
369 // Use this when you need to know whether the hostname was an IP address. 366 // Use this when you need to know whether the hostname was an IP address.
370 // A successful return is indicated by host_info->family != BROKEN. See the 367 // A successful return is indicated by host_info->family != BROKEN. See the
371 // definition of CanonHostInfo above for details. 368 // definition of CanonHostInfo above for details.
372 URL_EXPORT void CanonicalizeHostVerbose(const char* spec, 369 void CanonicalizeHostVerbose(const char* spec,
373 const url_parse::Component& host, 370 const url_parse::Component& host,
374 CanonOutput* output, 371 CanonOutput* output,
375 CanonHostInfo* host_info); 372 CanonHostInfo* host_info);
376 URL_EXPORT void CanonicalizeHostVerbose(const char16* spec, 373 void CanonicalizeHostVerbose(const char16* spec,
377 const url_parse::Component& host, 374 const url_parse::Component& host,
378 CanonOutput* output, 375 CanonOutput* output,
379 CanonHostInfo* host_info); 376 CanonHostInfo* host_info);
380 377
381 378
382 // IP addresses. 379 // IP addresses.
383 // 380 //
384 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is 381 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
385 // an IP address, it will canonicalize it as such, appending it to |output|. 382 // an IP address, it will canonicalize it as such, appending it to |output|.
386 // Additional status information is returned via the |*host_info| parameter. 383 // Additional status information is returned via the |*host_info| parameter.
387 // See the definition of CanonHostInfo above for details. 384 // See the definition of CanonHostInfo above for details.
388 // 385 //
389 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that 386 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
390 // the input is unescaped and name-prepped, etc. It should not normally be 387 // the input is unescaped and name-prepped, etc. It should not normally be
391 // necessary or wise to call this directly. 388 // necessary or wise to call this directly.
392 URL_EXPORT void CanonicalizeIPAddress(const char* spec, 389 void CanonicalizeIPAddress(const char* spec,
393 const url_parse::Component& host, 390 const url_parse::Component& host,
394 CanonOutput* output, 391 CanonOutput* output,
395 CanonHostInfo* host_info); 392 CanonHostInfo* host_info);
396 URL_EXPORT void CanonicalizeIPAddress(const char16* spec, 393 void CanonicalizeIPAddress(const char16* spec,
397 const url_parse::Component& host, 394 const url_parse::Component& host,
398 CanonOutput* output, 395 CanonOutput* output,
399 CanonHostInfo* host_info); 396 CanonHostInfo* host_info);
400 397
401 // Port: this function will add the colon for the port if a port is present. 398 // Port: this function will add the colon for the port if a port is present.
402 // The caller can pass url_parse::PORT_UNSPECIFIED as the 399 // The caller can pass url_parse::PORT_UNSPECIFIED as the
403 // default_port_for_scheme argument if there is no default port. 400 // default_port_for_scheme argument if there is no default port.
404 // 401 //
405 // The 8-bit version requires UTF-8 encoding. 402 // The 8-bit version requires UTF-8 encoding.
406 URL_EXPORT bool CanonicalizePort(const char* spec, 403 bool CanonicalizePort(const char* spec,
407 const url_parse::Component& port, 404 const url_parse::Component& port,
408 int default_port_for_scheme, 405 int default_port_for_scheme,
409 CanonOutput* output, 406 CanonOutput* output,
410 url_parse::Component* out_port); 407 url_parse::Component* out_port);
411 URL_EXPORT bool CanonicalizePort(const char16* spec, 408 bool CanonicalizePort(const char16* spec,
412 const url_parse::Component& port, 409 const url_parse::Component& port,
413 int default_port_for_scheme, 410 int default_port_for_scheme,
414 CanonOutput* output, 411 CanonOutput* output,
415 url_parse::Component* out_port); 412 url_parse::Component* out_port);
416 413
417 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED 414 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
418 // if the scheme is unknown. 415 // if the scheme is unknown.
419 URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len); 416 int DefaultPortForScheme(const char* scheme, int scheme_len);
420 417
421 // Path. If the input does not begin in a slash (including if the input is 418 // Path. If the input does not begin in a slash (including if the input is
422 // empty), we'll prepend a slash to the path to make it canonical. 419 // empty), we'll prepend a slash to the path to make it canonical.
423 // 420 //
424 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity 421 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity
425 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid 422 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
426 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't 423 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
427 // an issue. Somebody giving us an 8-bit path is responsible for generating 424 // an issue. Somebody giving us an 8-bit path is responsible for generating
428 // the path that the server expects (we'll escape high-bit characters), so 425 // the path that the server expects (we'll escape high-bit characters), so
429 // if something is invalid, it's their problem. 426 // if something is invalid, it's their problem.
430 URL_EXPORT bool CanonicalizePath(const char* spec, 427 bool CanonicalizePath(const char* spec,
431 const url_parse::Component& path, 428 const url_parse::Component& path,
432 CanonOutput* output, 429 CanonOutput* output,
433 url_parse::Component* out_path); 430 url_parse::Component* out_path);
434 URL_EXPORT bool CanonicalizePath(const char16* spec, 431 bool CanonicalizePath(const char16* spec,
435 const url_parse::Component& path, 432 const url_parse::Component& path,
436 CanonOutput* output, 433 CanonOutput* output,
437 url_parse::Component* out_path); 434 url_parse::Component* out_path);
438 435
439 // Canonicalizes the input as a file path. This is like CanonicalizePath except 436 // Canonicalizes the input as a file path. This is like CanonicalizePath except
440 // that it also handles Windows drive specs. For example, the path can begin 437 // that it also handles Windows drive specs. For example, the path can begin
441 // with "c|\" and it will get properly canonicalized to "C:/". 438 // with "c|\" and it will get properly canonicalized to "C:/".
442 // The string will be appended to |*output| and |*out_path| will be updated. 439 // The string will be appended to |*output| and |*out_path| will be updated.
443 // 440 //
444 // The 8-bit version requires UTF-8 encoding. 441 // The 8-bit version requires UTF-8 encoding.
445 URL_EXPORT bool FileCanonicalizePath(const char* spec, 442 bool FileCanonicalizePath(const char* spec,
446 const url_parse::Component& path, 443 const url_parse::Component& path,
447 CanonOutput* output, 444 CanonOutput* output,
448 url_parse::Component* out_path); 445 url_parse::Component* out_path);
449 URL_EXPORT bool FileCanonicalizePath(const char16* spec, 446 bool FileCanonicalizePath(const char16* spec,
450 const url_parse::Component& path, 447 const url_parse::Component& path,
451 CanonOutput* output, 448 CanonOutput* output,
452 url_parse::Component* out_path); 449 url_parse::Component* out_path);
453 450
454 // Query: Prepends the ? if needed. 451 // Query: Prepends the ? if needed.
455 // 452 //
456 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly 453 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly
457 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode 454 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode
458 // "invalid character." This function can not fail, we always just try to do 455 // "invalid character." This function can not fail, we always just try to do
459 // our best for crazy input here since web pages can set it themselves. 456 // our best for crazy input here since web pages can set it themselves.
460 // 457 //
461 // This will convert the given input into the output encoding that the given 458 // This will convert the given input into the output encoding that the given
462 // character set converter object provides. The converter will only be called 459 // character set converter object provides. The converter will only be called
463 // if necessary, for ASCII input, no conversions are necessary. 460 // if necessary, for ASCII input, no conversions are necessary.
464 // 461 //
465 // The converter can be NULL. In this case, the output encoding will be UTF-8. 462 // The converter can be NULL. In this case, the output encoding will be UTF-8.
466 URL_EXPORT void CanonicalizeQuery(const char* spec, 463 void CanonicalizeQuery(const char* spec,
467 const url_parse::Component& query, 464 const url_parse::Component& query,
468 CharsetConverter* converter, 465 CharsetConverter* converter,
469 CanonOutput* output, 466 CanonOutput* output,
470 url_parse::Component* out_query); 467 url_parse::Component* out_query);
471 URL_EXPORT void CanonicalizeQuery(const char16* spec, 468 void CanonicalizeQuery(const char16* spec,
472 const url_parse::Component& query, 469 const url_parse::Component& query,
473 CharsetConverter* converter, 470 CharsetConverter* converter,
474 CanonOutput* output, 471 CanonOutput* output,
475 url_parse::Component* out_query); 472 url_parse::Component* out_query);
476 473
477 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only 474 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
478 // canonicalizer that does not produce ASCII output). The output is 475 // canonicalizer that does not produce ASCII output). The output is
479 // guaranteed to be valid UTF-8. 476 // guaranteed to be valid UTF-8.
480 // 477 //
481 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use 478 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
482 // the "Unicode replacement character" for the confusing bits and copy the rest. 479 // the "Unicode replacement character" for the confusing bits and copy the rest.
483 URL_EXPORT void CanonicalizeRef(const char* spec, 480 void CanonicalizeRef(const char* spec,
484 const url_parse::Component& path, 481 const url_parse::Component& path,
485 CanonOutput* output, 482 CanonOutput* output,
486 url_parse::Component* out_path); 483 url_parse::Component* out_path);
487 URL_EXPORT void CanonicalizeRef(const char16* spec, 484 void CanonicalizeRef(const char16* spec,
488 const url_parse::Component& path, 485 const url_parse::Component& path,
489 CanonOutput* output, 486 CanonOutput* output,
490 url_parse::Component* out_path); 487 url_parse::Component* out_path);
491 488
492 // Full canonicalizer --------------------------------------------------------- 489 // Full canonicalizer ---------------------------------------------------------
493 // 490 //
494 // These functions replace any string contents, rather than append as above. 491 // These functions replace any string contents, rather than append as above.
495 // See the above piece-by-piece functions for information specific to 492 // See the above piece-by-piece functions for information specific to
496 // canonicalizing individual components. 493 // canonicalizing individual components.
497 // 494 //
498 // The output will be ASCII except the reference fragment, which may be UTF-8. 495 // The output will be ASCII except the reference fragment, which may be UTF-8.
499 // 496 //
500 // The 8-bit versions require UTF-8 encoding. 497 // The 8-bit versions require UTF-8 encoding.
501 498
502 // Use for standard URLs with authorities and paths. 499 // Use for standard URLs with authorities and paths.
503 URL_EXPORT bool CanonicalizeStandardURL(const char* spec, 500 bool CanonicalizeStandardURL(const char* spec,
504 int spec_len, 501 int spec_len,
505 const url_parse::Parsed& parsed, 502 const url_parse::Parsed& parsed,
506 CharsetConverter* query_converter, 503 CharsetConverter* query_converter,
507 CanonOutput* output, 504 CanonOutput* output,
508 url_parse::Parsed* new_parsed); 505 url_parse::Parsed* new_parsed);
509 URL_EXPORT bool CanonicalizeStandardURL(const char16* spec, 506 bool CanonicalizeStandardURL(const char16* spec,
510 int spec_len, 507 int spec_len,
511 const url_parse::Parsed& parsed, 508 const url_parse::Parsed& parsed,
512 CharsetConverter* query_converter, 509 CharsetConverter* query_converter,
513 CanonOutput* output, 510 CanonOutput* output,
514 url_parse::Parsed* new_parsed); 511 url_parse::Parsed* new_parsed);
515 512
516 // Use for file URLs. 513 // Use for file URLs.
517 URL_EXPORT bool CanonicalizeFileURL(const char* spec, 514 bool CanonicalizeFileURL(const char* spec,
518 int spec_len, 515 int spec_len,
519 const url_parse::Parsed& parsed, 516 const url_parse::Parsed& parsed,
520 CharsetConverter* query_converter, 517 CharsetConverter* query_converter,
521 CanonOutput* output, 518 CanonOutput* output,
522 url_parse::Parsed* new_parsed); 519 url_parse::Parsed* new_parsed);
523 URL_EXPORT bool CanonicalizeFileURL(const char16* spec, 520 bool CanonicalizeFileURL(const char16* spec,
524 int spec_len, 521 int spec_len,
525 const url_parse::Parsed& parsed, 522 const url_parse::Parsed& parsed,
526 CharsetConverter* query_converter, 523 CharsetConverter* query_converter,
527 CanonOutput* output, 524 CanonOutput* output,
528 url_parse::Parsed* new_parsed); 525 url_parse::Parsed* new_parsed);
529 526
530 // Use for filesystem URLs. 527 // Use for filesystem URLs.
531 URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec, 528 bool CanonicalizeFileSystemURL(const char* spec,
532 int spec_len, 529 int spec_len,
533 const url_parse::Parsed& parsed, 530 const url_parse::Parsed& parsed,
534 CharsetConverter* query_converter, 531 CharsetConverter* query_converter,
535 CanonOutput* output, 532 CanonOutput* output,
536 url_parse::Parsed* new_parsed); 533 url_parse::Parsed* new_parsed);
537 URL_EXPORT bool CanonicalizeFileSystemURL(const char16* spec, 534 bool CanonicalizeFileSystemURL(const char16* spec,
538 int spec_len, 535 int spec_len,
539 const url_parse::Parsed& parsed, 536 const url_parse::Parsed& parsed,
540 CharsetConverter* query_converter, 537 CharsetConverter* query_converter,
541 CanonOutput* output, 538 CanonOutput* output,
542 url_parse::Parsed* new_parsed); 539 url_parse::Parsed* new_parsed);
543 540
544 // Use for path URLs such as javascript. This does not modify the path in any 541 // Use for path URLs such as javascript. This does not modify the path in any
545 // way, for example, by escaping it. 542 // way, for example, by escaping it.
546 URL_EXPORT bool CanonicalizePathURL(const char* spec, 543 bool CanonicalizePathURL(const char* spec,
547 int spec_len, 544 int spec_len,
548 const url_parse::Parsed& parsed, 545 const url_parse::Parsed& parsed,
549 CanonOutput* output, 546 CanonOutput* output,
550 url_parse::Parsed* new_parsed); 547 url_parse::Parsed* new_parsed);
551 URL_EXPORT bool CanonicalizePathURL(const char16* spec, 548 bool CanonicalizePathURL(const char16* spec,
552 int spec_len, 549 int spec_len,
553 const url_parse::Parsed& parsed, 550 const url_parse::Parsed& parsed,
554 CanonOutput* output, 551 CanonOutput* output,
555 url_parse::Parsed* new_parsed); 552 url_parse::Parsed* new_parsed);
556 553
557 // Use for mailto URLs. This "canonicalizes" the url into a path and query 554 // Use for mailto URLs. This "canonicalizes" the url into a path and query
558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for 555 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
559 // the query encoding if there is a query. This is because a mailto URL is 556 // the query encoding if there is a query. This is because a mailto URL is
560 // really intended for an external mail program, and the encoding of a page, 557 // really intended for an external mail program, and the encoding of a page,
561 // etc. which would influence a query encoding normally are irrelevant. 558 // etc. which would influence a query encoding normally are irrelevant.
562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec, 559 bool CanonicalizeMailtoURL(const char* spec,
563 int spec_len, 560 int spec_len,
564 const url_parse::Parsed& parsed, 561 const url_parse::Parsed& parsed,
565 CanonOutput* output, 562 CanonOutput* output,
566 url_parse::Parsed* new_parsed); 563 url_parse::Parsed* new_parsed);
567 URL_EXPORT bool CanonicalizeMailtoURL(const char16* spec, 564 bool CanonicalizeMailtoURL(const char16* spec,
568 int spec_len, 565 int spec_len,
569 const url_parse::Parsed& parsed, 566 const url_parse::Parsed& parsed,
570 CanonOutput* output, 567 CanonOutput* output,
571 url_parse::Parsed* new_parsed); 568 url_parse::Parsed* new_parsed);
572 569
573 // Part replacer -------------------------------------------------------------- 570 // Part replacer --------------------------------------------------------------
574 571
575 // Internal structure used for storing separate strings for each component. 572 // Internal structure used for storing separate strings for each component.
576 // The basic canonicalization functions use this structure internally so that 573 // The basic canonicalization functions use this structure internally so that
577 // component replacement (different strings for different components) can be 574 // component replacement (different strings for different components) can be
578 // treated on the same code path as regular canonicalization (the same string 575 // treated on the same code path as regular canonicalization (the same string
579 // for each component). 576 // for each component).
580 // 577 //
581 // A url_parse::Parsed structure usually goes along with this. Those 578 // A url_parse::Parsed structure usually goes along with this. Those
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
746 // Replace component | (replacement string) (replacement component) 743 // Replace component | (replacement string) (replacement component)
747 // Delete component | (non-NULL) (invalid component: (0,-1)) 744 // Delete component | (non-NULL) (invalid component: (0,-1))
748 // 745 //
749 // We use a pointer to the empty string for the source when the component 746 // We use a pointer to the empty string for the source when the component
750 // should be deleted. 747 // should be deleted.
751 URLComponentSource<CHAR> sources_; 748 URLComponentSource<CHAR> sources_;
752 url_parse::Parsed components_; 749 url_parse::Parsed components_;
753 }; 750 };
754 751
755 // The base must be an 8-bit canonical URL. 752 // The base must be an 8-bit canonical URL.
756 URL_EXPORT bool ReplaceStandardURL(const char* base, 753 bool ReplaceStandardURL(const char* base,
757 const url_parse::Parsed& base_parsed, 754 const url_parse::Parsed& base_parsed,
758 const Replacements<char>& replacements, 755 const Replacements<char>& replacements,
759 CharsetConverter* query_converter, 756 CharsetConverter* query_converter,
760 CanonOutput* output, 757 CanonOutput* output,
761 url_parse::Parsed* new_parsed); 758 url_parse::Parsed* new_parsed);
762 URL_EXPORT bool ReplaceStandardURL(const char* base, 759 bool ReplaceStandardURL(const char* base,
763 const url_parse::Parsed& base_parsed, 760 const url_parse::Parsed& base_parsed,
764 const Replacements<char16>& replacements, 761 const Replacements<char16>& replacements,
765 CharsetConverter* query_converter, 762 CharsetConverter* query_converter,
766 CanonOutput* output, 763 CanonOutput* output,
767 url_parse::Parsed* new_parsed); 764 url_parse::Parsed* new_parsed);
768 765
769 // Filesystem URLs can only have the path, query, or ref replaced. 766 // Filesystem URLs can only have the path, query, or ref replaced.
770 // All other components will be ignored. 767 // All other components will be ignored.
771 URL_EXPORT bool ReplaceFileSystemURL(const char* base, 768 bool ReplaceFileSystemURL(const char* base,
772 const url_parse::Parsed& base_parsed, 769 const url_parse::Parsed& base_parsed,
773 const Replacements<char>& replacements, 770 const Replacements<char>& replacements,
774 CharsetConverter* query_converter, 771 CharsetConverter* query_converter,
775 CanonOutput* output, 772 CanonOutput* output,
776 url_parse::Parsed* new_parsed); 773 url_parse::Parsed* new_parsed);
777 URL_EXPORT bool ReplaceFileSystemURL(const char* base, 774 bool ReplaceFileSystemURL(const char* base,
778 const url_parse::Parsed& base_parsed, 775 const url_parse::Parsed& base_parsed,
779 const Replacements<char16>& replacements, 776 const Replacements<char16>& replacements,
780 CharsetConverter* query_converter, 777 CharsetConverter* query_converter,
781 CanonOutput* output, 778 CanonOutput* output,
782 url_parse::Parsed* new_parsed); 779 url_parse::Parsed* new_parsed);
783 780
784 // Replacing some parts of a file URL is not permitted. Everything except 781 // Replacing some parts of a file URL is not permitted. Everything except
785 // the host, path, query, and ref will be ignored. 782 // the host, path, query, and ref will be ignored.
786 URL_EXPORT bool ReplaceFileURL(const char* base, 783 bool ReplaceFileURL(const char* base,
787 const url_parse::Parsed& base_parsed, 784 const url_parse::Parsed& base_parsed,
788 const Replacements<char>& replacements, 785 const Replacements<char>& replacements,
789 CharsetConverter* query_converter, 786 CharsetConverter* query_converter,
790 CanonOutput* output, 787 CanonOutput* output,
791 url_parse::Parsed* new_parsed); 788 url_parse::Parsed* new_parsed);
792 URL_EXPORT bool ReplaceFileURL(const char* base, 789 bool ReplaceFileURL(const char* base,
793 const url_parse::Parsed& base_parsed, 790 const url_parse::Parsed& base_parsed,
794 const Replacements<char16>& replacements, 791 const Replacements<char16>& replacements,
795 CharsetConverter* query_converter, 792 CharsetConverter* query_converter,
796 CanonOutput* output, 793 CanonOutput* output,
797 url_parse::Parsed* new_parsed); 794 url_parse::Parsed* new_parsed);
798 795
799 // Path URLs can only have the scheme and path replaced. All other components 796 // Path URLs can only have the scheme and path replaced. All other components
800 // will be ignored. 797 // will be ignored.
801 URL_EXPORT bool ReplacePathURL(const char* base, 798 bool ReplacePathURL(const char* base,
802 const url_parse::Parsed& base_parsed, 799 const url_parse::Parsed& base_parsed,
803 const Replacements<char>& replacements, 800 const Replacements<char>& replacements,
804 CanonOutput* output, 801 CanonOutput* output,
805 url_parse::Parsed* new_parsed); 802 url_parse::Parsed* new_parsed);
806 URL_EXPORT bool ReplacePathURL(const char* base, 803 bool ReplacePathURL(const char* base,
807 const url_parse::Parsed& base_parsed, 804 const url_parse::Parsed& base_parsed,
808 const Replacements<char16>& replacements, 805 const Replacements<char16>& replacements,
809 CanonOutput* output, 806 CanonOutput* output,
810 url_parse::Parsed* new_parsed); 807 url_parse::Parsed* new_parsed);
811 808
812 // Mailto URLs can only have the scheme, path, and query replaced. 809 // Mailto URLs can only have the scheme, path, and query replaced.
813 // All other components will be ignored. 810 // All other components will be ignored.
814 URL_EXPORT bool ReplaceMailtoURL(const char* base, 811 bool ReplaceMailtoURL(const char* base,
815 const url_parse::Parsed& base_parsed, 812 const url_parse::Parsed& base_parsed,
816 const Replacements<char>& replacements, 813 const Replacements<char>& replacements,
817 CanonOutput* output, 814 CanonOutput* output,
818 url_parse::Parsed* new_parsed); 815 url_parse::Parsed* new_parsed);
819 URL_EXPORT bool ReplaceMailtoURL(const char* base, 816 bool ReplaceMailtoURL(const char* base,
820 const url_parse::Parsed& base_parsed, 817 const url_parse::Parsed& base_parsed,
821 const Replacements<char16>& replacements, 818 const Replacements<char16>& replacements,
822 CanonOutput* output, 819 CanonOutput* output,
823 url_parse::Parsed* new_parsed); 820 url_parse::Parsed* new_parsed);
824 821
825 // Relative URL --------------------------------------------------------------- 822 // Relative URL ---------------------------------------------------------------
826 823
827 // Given an input URL or URL fragment |fragment|, determines if it is a 824 // Given an input URL or URL fragment |fragment|, determines if it is a
828 // relative or absolute URL and places the result into |*is_relative|. If it is 825 // relative or absolute URL and places the result into |*is_relative|. If it is
829 // relative, the relevant portion of the URL will be placed into 826 // relative, the relevant portion of the URL will be placed into
830 // |*relative_component| (there may have been trimmed whitespace, for example). 827 // |*relative_component| (there may have been trimmed whitespace, for example).
831 // This value is passed to ResolveRelativeURL. If the input is not relative, 828 // This value is passed to ResolveRelativeURL. If the input is not relative,
832 // this value is UNDEFINED (it may be changed by the function). 829 // this value is UNDEFINED (it may be changed by the function).
833 // 830 //
834 // Returns true on success (we successfully determined the URL is relative or 831 // Returns true on success (we successfully determined the URL is relative or
835 // not). Failure means that the combination of URLs doesn't make any sense. 832 // not). Failure means that the combination of URLs doesn't make any sense.
836 // 833 //
837 // The base URL should always be canonical, therefore is ASCII. 834 // The base URL should always be canonical, therefore is ASCII.
838 URL_EXPORT bool IsRelativeURL(const char* base, 835 bool IsRelativeURL(const char* base,
839 const url_parse::Parsed& base_parsed, 836 const url_parse::Parsed& base_parsed,
840 const char* fragment, 837 const char* fragment,
841 int fragment_len, 838 int fragment_len,
842 bool is_base_hierarchical, 839 bool is_base_hierarchical,
843 bool* is_relative, 840 bool* is_relative,
844 url_parse::Component* relative_component); 841 url_parse::Component* relative_component);
845 URL_EXPORT bool IsRelativeURL(const char* base, 842 bool IsRelativeURL(const char* base,
846 const url_parse::Parsed& base_parsed, 843 const url_parse::Parsed& base_parsed,
847 const char16* fragment, 844 const char16* fragment,
848 int fragment_len, 845 int fragment_len,
849 bool is_base_hierarchical, 846 bool is_base_hierarchical,
850 bool* is_relative, 847 bool* is_relative,
851 url_parse::Component* relative_component); 848 url_parse::Component* relative_component);
852 849
853 // Given a canonical parsed source URL, a URL fragment known to be relative, 850 // Given a canonical parsed source URL, a URL fragment known to be relative,
854 // and the identified relevant portion of the relative URL (computed by 851 // and the identified relevant portion of the relative URL (computed by
855 // IsRelativeURL), this produces a new parsed canonical URL in |output| and 852 // IsRelativeURL), this produces a new parsed canonical URL in |output| and
856 // |out_parsed|. 853 // |out_parsed|.
857 // 854 //
858 // It also requires a flag indicating whether the base URL is a file: URL 855 // It also requires a flag indicating whether the base URL is a file: URL
859 // which triggers additional logic. 856 // which triggers additional logic.
860 // 857 //
861 // The base URL should be canonical and have a host (may be empty for file 858 // The base URL should be canonical and have a host (may be empty for file
862 // URLs) and a path. If it doesn't have these, we can't resolve relative 859 // URLs) and a path. If it doesn't have these, we can't resolve relative
863 // URLs off of it and will return the base as the output with an error flag. 860 // URLs off of it and will return the base as the output with an error flag.
864 // Becausee it is canonical is should also be ASCII. 861 // Becausee it is canonical is should also be ASCII.
865 // 862 //
866 // The query charset converter follows the same rules as CanonicalizeQuery. 863 // The query charset converter follows the same rules as CanonicalizeQuery.
867 // 864 //
868 // Returns true on success. On failure, the output will be "something 865 // Returns true on success. On failure, the output will be "something
869 // reasonable" that will be consistent and valid, just probably not what 866 // reasonable" that will be consistent and valid, just probably not what
870 // was intended by the web page author or caller. 867 // was intended by the web page author or caller.
871 URL_EXPORT bool ResolveRelativeURL( 868 bool ResolveRelativeURL(const char* base_url,
872 const char* base_url, 869 const url_parse::Parsed& base_parsed,
873 const url_parse::Parsed& base_parsed, 870 bool base_is_file,
874 bool base_is_file, 871 const char* relative_url,
875 const char* relative_url, 872 const url_parse::Component& relative_component,
876 const url_parse::Component& relative_component, 873 CharsetConverter* query_converter,
877 CharsetConverter* query_converter, 874 CanonOutput* output,
878 CanonOutput* output, 875 url_parse::Parsed* out_parsed);
879 url_parse::Parsed* out_parsed); 876 bool ResolveRelativeURL(const char* base_url,
880 URL_EXPORT bool ResolveRelativeURL( 877 const url_parse::Parsed& base_parsed,
881 const char* base_url, 878 bool base_is_file,
882 const url_parse::Parsed& base_parsed, 879 const char16* relative_url,
883 bool base_is_file, 880 const url_parse::Component& relative_component,
884 const char16* relative_url, 881 CharsetConverter* query_converter,
885 const url_parse::Component& relative_component, 882 CanonOutput* output,
886 CharsetConverter* query_converter, 883 url_parse::Parsed* out_parsed);
887 CanonOutput* output,
888 url_parse::Parsed* out_parsed);
889 884
890 } // namespace url_canon 885 } // namespace url_canon
891 886
892 #endif // URL_URL_CANON_H_ 887 #endif // URL_URL_CANON_H_
OLDNEW
« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698