Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: trunk/src/url/url_canon.h

Issue 15799007: Revert 203027 "Revert 203025 "Make the copy of GURL in src/url b..." (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef URL_URL_CANON_H_ 5 #ifndef URL_URL_CANON_H_
6 #define URL_URL_CANON_H_ 6 #define URL_URL_CANON_H_
7 7
8 #include <stdlib.h> 8 #include <stdlib.h>
9 #include <string.h> 9 #include <string.h>
10 10
11 #include "base/string16.h" 11 #include "base/string16.h"
12 #include "url/url_export.h"
12 #include "url/url_parse.h" 13 #include "url/url_parse.h"
13 14
14 namespace url_canon { 15 namespace url_canon {
15 16
16 // Canonicalizer output ------------------------------------------------------- 17 // Canonicalizer output -------------------------------------------------------
17 18
18 // Base class for the canonicalizer output, this maintains a buffer and 19 // Base class for the canonicalizer output, this maintains a buffer and
19 // supports simple resizing and append operations on it. 20 // supports simple resizing and append operations on it.
20 // 21 //
21 // It is VERY IMPORTANT that no virtual function calls be made on the common 22 // It is VERY IMPORTANT that no virtual function calls be made on the common
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {}; 180 class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};
180 181
181 // Character set converter ---------------------------------------------------- 182 // Character set converter ----------------------------------------------------
182 // 183 //
183 // Converts query strings into a custom encoding. The embedder can supply an 184 // Converts query strings into a custom encoding. The embedder can supply an
184 // implementation of this class to interface with their own character set 185 // implementation of this class to interface with their own character set
185 // conversion libraries. 186 // conversion libraries.
186 // 187 //
187 // Embedders will want to see the unit test for the ICU version. 188 // Embedders will want to see the unit test for the ICU version.
188 189
189 class CharsetConverter { 190 class URL_EXPORT CharsetConverter {
190 public: 191 public:
191 CharsetConverter() {} 192 CharsetConverter() {}
192 virtual ~CharsetConverter() {} 193 virtual ~CharsetConverter() {}
193 194
194 // Converts the given input string from UTF-16 to whatever output format the 195 // Converts the given input string from UTF-16 to whatever output format the
195 // converter supports. This is used only for the query encoding conversion, 196 // converter supports. This is used only for the query encoding conversion,
196 // which does not fail. Instead, the converter should insert "invalid 197 // which does not fail. Instead, the converter should insert "invalid
197 // character" characters in the output for invalid sequences, and do the 198 // character" characters in the output for invalid sequences, and do the
198 // best it can. 199 // best it can.
199 // 200 //
(...skipping 17 matching lines...) Expand all
217 // This should be called before parsing if whitespace removal is desired (which 218 // This should be called before parsing if whitespace removal is desired (which
218 // it normally is when you are canonicalizing). 219 // it normally is when you are canonicalizing).
219 // 220 //
220 // If no whitespace is removed, this function will not use the buffer and will 221 // If no whitespace is removed, this function will not use the buffer and will
221 // return a pointer to the input, to avoid the extra copy. If modification is 222 // return a pointer to the input, to avoid the extra copy. If modification is
222 // required, the given |buffer| will be used and the returned pointer will 223 // required, the given |buffer| will be used and the returned pointer will
223 // point to the beginning of the buffer. 224 // point to the beginning of the buffer.
224 // 225 //
225 // Therefore, callers should not use the buffer, since it may actuall be empty, 226 // Therefore, callers should not use the buffer, since it may actuall be empty,
226 // use the computed pointer and |*output_len| instead. 227 // use the computed pointer and |*output_len| instead.
227 const char* RemoveURLWhitespace(const char* input, int input_len, 228 URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len,
228 CanonOutputT<char>* buffer, 229 CanonOutputT<char>* buffer,
229 int* output_len); 230 int* output_len);
230 const char16* RemoveURLWhitespace(const char16* input, int input_len, 231 URL_EXPORT const char16* RemoveURLWhitespace(const char16* input, int input_len,
231 CanonOutputT<char16>* buffer, 232 CanonOutputT<char16>* buffer,
232 int* output_len); 233 int* output_len);
233 234
234 // IDN ------------------------------------------------------------------------ 235 // IDN ------------------------------------------------------------------------
235 236
236 // Converts the Unicode input representing a hostname to ASCII using IDN rules. 237 // Converts the Unicode input representing a hostname to ASCII using IDN rules.
237 // The output must fall in the ASCII range, but will be encoded in UTF-16. 238 // The output must fall in the ASCII range, but will be encoded in UTF-16.
238 // 239 //
239 // On success, the output will be filled with the ASCII host name and it will 240 // On success, the output will be filled with the ASCII host name and it will
240 // return true. Unlike most other canonicalization functions, this assumes that 241 // return true. Unlike most other canonicalization functions, this assumes that
241 // the output is empty. The beginning of the host will be at offset 0, and 242 // the output is empty. The beginning of the host will be at offset 0, and
242 // the length of the output will be set to the length of the new host name. 243 // the length of the output will be set to the length of the new host name.
243 // 244 //
244 // On error, returns false. The output in this case is undefined. 245 // On error, returns false. The output in this case is undefined.
245 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output); 246 URL_EXPORT bool IDNToASCII(const char16* src,
247 int src_len,
248 CanonOutputW* output);
246 249
247 // Piece-by-piece canonicalizers ---------------------------------------------- 250 // Piece-by-piece canonicalizers ----------------------------------------------
248 // 251 //
249 // These individual canonicalizers append the canonicalized versions of the 252 // These individual canonicalizers append the canonicalized versions of the
250 // corresponding URL component to the given std::string. The spec and the 253 // corresponding URL component to the given std::string. The spec and the
251 // previously-identified range of that component are the input. The range of 254 // previously-identified range of that component are the input. The range of
252 // the canonicalized component will be written to the output component. 255 // the canonicalized component will be written to the output component.
253 // 256 //
254 // These functions all append to the output so they can be chained. Make sure 257 // These functions all append to the output so they can be chained. Make sure
255 // the output is empty when you start. 258 // the output is empty when you start.
256 // 259 //
257 // These functions returns boolean values indicating success. On failure, they 260 // These functions returns boolean values indicating success. On failure, they
258 // will attempt to write something reasonable to the output so that, if 261 // will attempt to write something reasonable to the output so that, if
259 // displayed to the user, they will recognise it as something that's messed up. 262 // displayed to the user, they will recognise it as something that's messed up.
260 // Nothing more should ever be done with these invalid URLs, however. 263 // Nothing more should ever be done with these invalid URLs, however.
261 264
262 // Scheme: Appends the scheme and colon to the URL. The output component will 265 // Scheme: Appends the scheme and colon to the URL. The output component will
263 // indicate the range of characters up to but not including the colon. 266 // indicate the range of characters up to but not including the colon.
264 // 267 //
265 // Canonical URLs always have a scheme. If the scheme is not present in the 268 // Canonical URLs always have a scheme. If the scheme is not present in the
266 // input, this will just write the colon to indicate an empty scheme. Does not 269 // input, this will just write the colon to indicate an empty scheme. Does not
267 // append slashes which will be needed before any authority components for most 270 // append slashes which will be needed before any authority components for most
268 // URLs. 271 // URLs.
269 // 272 //
270 // The 8-bit version requires UTF-8 encoding. 273 // The 8-bit version requires UTF-8 encoding.
271 bool CanonicalizeScheme(const char* spec, 274 URL_EXPORT bool CanonicalizeScheme(const char* spec,
272 const url_parse::Component& scheme, 275 const url_parse::Component& scheme,
273 CanonOutput* output, 276 CanonOutput* output,
274 url_parse::Component* out_scheme); 277 url_parse::Component* out_scheme);
275 bool CanonicalizeScheme(const char16* spec, 278 URL_EXPORT bool CanonicalizeScheme(const char16* spec,
276 const url_parse::Component& scheme, 279 const url_parse::Component& scheme,
277 CanonOutput* output, 280 CanonOutput* output,
278 url_parse::Component* out_scheme); 281 url_parse::Component* out_scheme);
279 282
280 // User info: username/password. If present, this will add the delimiters so 283 // User info: username/password. If present, this will add the delimiters so
281 // the output will be "<username>:<password>@" or "<username>@". Empty 284 // the output will be "<username>:<password>@" or "<username>@". Empty
282 // username/password pairs, or empty passwords, will get converted to 285 // username/password pairs, or empty passwords, will get converted to
283 // nonexistant in the canonical version. 286 // nonexistant in the canonical version.
284 // 287 //
285 // The components for the username and password refer to ranges in the 288 // The components for the username and password refer to ranges in the
286 // respective source strings. Usually, these will be the same string, which 289 // respective source strings. Usually, these will be the same string, which
287 // is legal as long as the two components don't overlap. 290 // is legal as long as the two components don't overlap.
288 // 291 //
289 // The 8-bit version requires UTF-8 encoding. 292 // The 8-bit version requires UTF-8 encoding.
290 bool CanonicalizeUserInfo(const char* username_source, 293 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
291 const url_parse::Component& username, 294 const url_parse::Component& username,
292 const char* password_source, 295 const char* password_source,
293 const url_parse::Component& password, 296 const url_parse::Component& password,
294 CanonOutput* output, 297 CanonOutput* output,
295 url_parse::Component* out_username, 298 url_parse::Component* out_username,
296 url_parse::Component* out_password); 299 url_parse::Component* out_password);
297 bool CanonicalizeUserInfo(const char16* username_source, 300 URL_EXPORT bool CanonicalizeUserInfo(const char16* username_source,
298 const url_parse::Component& username, 301 const url_parse::Component& username,
299 const char16* password_source, 302 const char16* password_source,
300 const url_parse::Component& password, 303 const url_parse::Component& password,
301 CanonOutput* output, 304 CanonOutput* output,
302 url_parse::Component* out_username, 305 url_parse::Component* out_username,
303 url_parse::Component* out_password); 306 url_parse::Component* out_password);
304 307
305 308
306 // This structure holds detailed state exported from the IP/Host canonicalizers. 309 // This structure holds detailed state exported from the IP/Host canonicalizers.
307 // Additional fields may be added as callers require them. 310 // Additional fields may be added as callers require them.
308 struct CanonHostInfo { 311 struct CanonHostInfo {
309 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} 312 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
310 313
311 // Convenience function to test if family is an IP address. 314 // Convenience function to test if family is an IP address.
312 bool IsIPAddress() const { return family == IPV4 || family == IPV6; } 315 bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
313 316
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
346 int AddressLength() const { 349 int AddressLength() const {
347 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0); 350 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
348 } 351 }
349 }; 352 };
350 353
351 354
352 // Host. 355 // Host.
353 // 356 //
354 // The 8-bit version requires UTF-8 encoding. Use this version when you only 357 // The 8-bit version requires UTF-8 encoding. Use this version when you only
355 // need to know whether canonicalization succeeded. 358 // need to know whether canonicalization succeeded.
356 bool CanonicalizeHost(const char* spec, 359 URL_EXPORT bool CanonicalizeHost(const char* spec,
357 const url_parse::Component& host, 360 const url_parse::Component& host,
358 CanonOutput* output, 361 CanonOutput* output,
359 url_parse::Component* out_host); 362 url_parse::Component* out_host);
360 bool CanonicalizeHost(const char16* spec, 363 URL_EXPORT bool CanonicalizeHost(const char16* spec,
361 const url_parse::Component& host, 364 const url_parse::Component& host,
362 CanonOutput* output, 365 CanonOutput* output,
363 url_parse::Component* out_host); 366 url_parse::Component* out_host);
364 367
365 // Extended version of CanonicalizeHost, which returns additional information. 368 // Extended version of CanonicalizeHost, which returns additional information.
366 // Use this when you need to know whether the hostname was an IP address. 369 // Use this when you need to know whether the hostname was an IP address.
367 // A successful return is indicated by host_info->family != BROKEN. See the 370 // A successful return is indicated by host_info->family != BROKEN. See the
368 // definition of CanonHostInfo above for details. 371 // definition of CanonHostInfo above for details.
369 void CanonicalizeHostVerbose(const char* spec, 372 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
370 const url_parse::Component& host, 373 const url_parse::Component& host,
371 CanonOutput* output, 374 CanonOutput* output,
372 CanonHostInfo* host_info); 375 CanonHostInfo* host_info);
373 void CanonicalizeHostVerbose(const char16* spec, 376 URL_EXPORT void CanonicalizeHostVerbose(const char16* spec,
374 const url_parse::Component& host, 377 const url_parse::Component& host,
375 CanonOutput* output, 378 CanonOutput* output,
376 CanonHostInfo* host_info); 379 CanonHostInfo* host_info);
377 380
378 381
379 // IP addresses. 382 // IP addresses.
380 // 383 //
381 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is 384 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
382 // an IP address, it will canonicalize it as such, appending it to |output|. 385 // an IP address, it will canonicalize it as such, appending it to |output|.
383 // Additional status information is returned via the |*host_info| parameter. 386 // Additional status information is returned via the |*host_info| parameter.
384 // See the definition of CanonHostInfo above for details. 387 // See the definition of CanonHostInfo above for details.
385 // 388 //
386 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that 389 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
387 // the input is unescaped and name-prepped, etc. It should not normally be 390 // the input is unescaped and name-prepped, etc. It should not normally be
388 // necessary or wise to call this directly. 391 // necessary or wise to call this directly.
389 void CanonicalizeIPAddress(const char* spec, 392 URL_EXPORT void CanonicalizeIPAddress(const char* spec,
390 const url_parse::Component& host, 393 const url_parse::Component& host,
391 CanonOutput* output, 394 CanonOutput* output,
392 CanonHostInfo* host_info); 395 CanonHostInfo* host_info);
393 void CanonicalizeIPAddress(const char16* spec, 396 URL_EXPORT void CanonicalizeIPAddress(const char16* spec,
394 const url_parse::Component& host, 397 const url_parse::Component& host,
395 CanonOutput* output, 398 CanonOutput* output,
396 CanonHostInfo* host_info); 399 CanonHostInfo* host_info);
397 400
398 // Port: this function will add the colon for the port if a port is present. 401 // Port: this function will add the colon for the port if a port is present.
399 // The caller can pass url_parse::PORT_UNSPECIFIED as the 402 // The caller can pass url_parse::PORT_UNSPECIFIED as the
400 // default_port_for_scheme argument if there is no default port. 403 // default_port_for_scheme argument if there is no default port.
401 // 404 //
402 // The 8-bit version requires UTF-8 encoding. 405 // The 8-bit version requires UTF-8 encoding.
403 bool CanonicalizePort(const char* spec, 406 URL_EXPORT bool CanonicalizePort(const char* spec,
404 const url_parse::Component& port, 407 const url_parse::Component& port,
405 int default_port_for_scheme, 408 int default_port_for_scheme,
406 CanonOutput* output, 409 CanonOutput* output,
407 url_parse::Component* out_port); 410 url_parse::Component* out_port);
408 bool CanonicalizePort(const char16* spec, 411 URL_EXPORT bool CanonicalizePort(const char16* spec,
409 const url_parse::Component& port, 412 const url_parse::Component& port,
410 int default_port_for_scheme, 413 int default_port_for_scheme,
411 CanonOutput* output, 414 CanonOutput* output,
412 url_parse::Component* out_port); 415 url_parse::Component* out_port);
413 416
414 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED 417 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
415 // if the scheme is unknown. 418 // if the scheme is unknown.
416 int DefaultPortForScheme(const char* scheme, int scheme_len); 419 URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
417 420
418 // Path. If the input does not begin in a slash (including if the input is 421 // Path. If the input does not begin in a slash (including if the input is
419 // empty), we'll prepend a slash to the path to make it canonical. 422 // empty), we'll prepend a slash to the path to make it canonical.
420 // 423 //
421 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity 424 // The 8-bit version assumes UTF-8 encoding, but does not verify the validity
422 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid 425 // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
423 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't 426 // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
424 // an issue. Somebody giving us an 8-bit path is responsible for generating 427 // an issue. Somebody giving us an 8-bit path is responsible for generating
425 // the path that the server expects (we'll escape high-bit characters), so 428 // the path that the server expects (we'll escape high-bit characters), so
426 // if something is invalid, it's their problem. 429 // if something is invalid, it's their problem.
427 bool CanonicalizePath(const char* spec, 430 URL_EXPORT bool CanonicalizePath(const char* spec,
428 const url_parse::Component& path, 431 const url_parse::Component& path,
429 CanonOutput* output, 432 CanonOutput* output,
430 url_parse::Component* out_path); 433 url_parse::Component* out_path);
431 bool CanonicalizePath(const char16* spec, 434 URL_EXPORT bool CanonicalizePath(const char16* spec,
432 const url_parse::Component& path, 435 const url_parse::Component& path,
433 CanonOutput* output, 436 CanonOutput* output,
434 url_parse::Component* out_path); 437 url_parse::Component* out_path);
435 438
436 // Canonicalizes the input as a file path. This is like CanonicalizePath except 439 // Canonicalizes the input as a file path. This is like CanonicalizePath except
437 // that it also handles Windows drive specs. For example, the path can begin 440 // that it also handles Windows drive specs. For example, the path can begin
438 // with "c|\" and it will get properly canonicalized to "C:/". 441 // with "c|\" and it will get properly canonicalized to "C:/".
439 // The string will be appended to |*output| and |*out_path| will be updated. 442 // The string will be appended to |*output| and |*out_path| will be updated.
440 // 443 //
441 // The 8-bit version requires UTF-8 encoding. 444 // The 8-bit version requires UTF-8 encoding.
442 bool FileCanonicalizePath(const char* spec, 445 URL_EXPORT bool FileCanonicalizePath(const char* spec,
443 const url_parse::Component& path, 446 const url_parse::Component& path,
444 CanonOutput* output, 447 CanonOutput* output,
445 url_parse::Component* out_path); 448 url_parse::Component* out_path);
446 bool FileCanonicalizePath(const char16* spec, 449 URL_EXPORT bool FileCanonicalizePath(const char16* spec,
447 const url_parse::Component& path, 450 const url_parse::Component& path,
448 CanonOutput* output, 451 CanonOutput* output,
449 url_parse::Component* out_path); 452 url_parse::Component* out_path);
450 453
451 // Query: Prepends the ? if needed. 454 // Query: Prepends the ? if needed.
452 // 455 //
453 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly 456 // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly
454 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode 457 // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode
455 // "invalid character." This function can not fail, we always just try to do 458 // "invalid character." This function can not fail, we always just try to do
456 // our best for crazy input here since web pages can set it themselves. 459 // our best for crazy input here since web pages can set it themselves.
457 // 460 //
458 // This will convert the given input into the output encoding that the given 461 // This will convert the given input into the output encoding that the given
459 // character set converter object provides. The converter will only be called 462 // character set converter object provides. The converter will only be called
460 // if necessary, for ASCII input, no conversions are necessary. 463 // if necessary, for ASCII input, no conversions are necessary.
461 // 464 //
462 // The converter can be NULL. In this case, the output encoding will be UTF-8. 465 // The converter can be NULL. In this case, the output encoding will be UTF-8.
463 void CanonicalizeQuery(const char* spec, 466 URL_EXPORT void CanonicalizeQuery(const char* spec,
464 const url_parse::Component& query, 467 const url_parse::Component& query,
465 CharsetConverter* converter, 468 CharsetConverter* converter,
466 CanonOutput* output, 469 CanonOutput* output,
467 url_parse::Component* out_query); 470 url_parse::Component* out_query);
468 void CanonicalizeQuery(const char16* spec, 471 URL_EXPORT void CanonicalizeQuery(const char16* spec,
469 const url_parse::Component& query, 472 const url_parse::Component& query,
470 CharsetConverter* converter, 473 CharsetConverter* converter,
471 CanonOutput* output, 474 CanonOutput* output,
472 url_parse::Component* out_query); 475 url_parse::Component* out_query);
473 476
474 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only 477 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
475 // canonicalizer that does not produce ASCII output). The output is 478 // canonicalizer that does not produce ASCII output). The output is
476 // guaranteed to be valid UTF-8. 479 // guaranteed to be valid UTF-8.
477 // 480 //
478 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use 481 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
479 // the "Unicode replacement character" for the confusing bits and copy the rest. 482 // the "Unicode replacement character" for the confusing bits and copy the rest.
480 void CanonicalizeRef(const char* spec, 483 URL_EXPORT void CanonicalizeRef(const char* spec,
481 const url_parse::Component& path, 484 const url_parse::Component& path,
482 CanonOutput* output, 485 CanonOutput* output,
483 url_parse::Component* out_path); 486 url_parse::Component* out_path);
484 void CanonicalizeRef(const char16* spec, 487 URL_EXPORT void CanonicalizeRef(const char16* spec,
485 const url_parse::Component& path, 488 const url_parse::Component& path,
486 CanonOutput* output, 489 CanonOutput* output,
487 url_parse::Component* out_path); 490 url_parse::Component* out_path);
488 491
489 // Full canonicalizer --------------------------------------------------------- 492 // Full canonicalizer ---------------------------------------------------------
490 // 493 //
491 // These functions replace any string contents, rather than append as above. 494 // These functions replace any string contents, rather than append as above.
492 // See the above piece-by-piece functions for information specific to 495 // See the above piece-by-piece functions for information specific to
493 // canonicalizing individual components. 496 // canonicalizing individual components.
494 // 497 //
495 // The output will be ASCII except the reference fragment, which may be UTF-8. 498 // The output will be ASCII except the reference fragment, which may be UTF-8.
496 // 499 //
497 // The 8-bit versions require UTF-8 encoding. 500 // The 8-bit versions require UTF-8 encoding.
498 501
499 // Use for standard URLs with authorities and paths. 502 // Use for standard URLs with authorities and paths.
500 bool CanonicalizeStandardURL(const char* spec, 503 URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
501 int spec_len, 504 int spec_len,
502 const url_parse::Parsed& parsed, 505 const url_parse::Parsed& parsed,
503 CharsetConverter* query_converter, 506 CharsetConverter* query_converter,
504 CanonOutput* output, 507 CanonOutput* output,
505 url_parse::Parsed* new_parsed); 508 url_parse::Parsed* new_parsed);
506 bool CanonicalizeStandardURL(const char16* spec, 509 URL_EXPORT bool CanonicalizeStandardURL(const char16* spec,
507 int spec_len, 510 int spec_len,
508 const url_parse::Parsed& parsed, 511 const url_parse::Parsed& parsed,
509 CharsetConverter* query_converter, 512 CharsetConverter* query_converter,
510 CanonOutput* output, 513 CanonOutput* output,
511 url_parse::Parsed* new_parsed); 514 url_parse::Parsed* new_parsed);
512 515
513 // Use for file URLs. 516 // Use for file URLs.
514 bool CanonicalizeFileURL(const char* spec, 517 URL_EXPORT bool CanonicalizeFileURL(const char* spec,
515 int spec_len, 518 int spec_len,
516 const url_parse::Parsed& parsed, 519 const url_parse::Parsed& parsed,
517 CharsetConverter* query_converter, 520 CharsetConverter* query_converter,
518 CanonOutput* output, 521 CanonOutput* output,
519 url_parse::Parsed* new_parsed); 522 url_parse::Parsed* new_parsed);
520 bool CanonicalizeFileURL(const char16* spec, 523 URL_EXPORT bool CanonicalizeFileURL(const char16* spec,
521 int spec_len, 524 int spec_len,
522 const url_parse::Parsed& parsed, 525 const url_parse::Parsed& parsed,
523 CharsetConverter* query_converter, 526 CharsetConverter* query_converter,
524 CanonOutput* output, 527 CanonOutput* output,
525 url_parse::Parsed* new_parsed); 528 url_parse::Parsed* new_parsed);
526 529
527 // Use for filesystem URLs. 530 // Use for filesystem URLs.
528 bool CanonicalizeFileSystemURL(const char* spec, 531 URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,
529 int spec_len, 532 int spec_len,
530 const url_parse::Parsed& parsed, 533 const url_parse::Parsed& parsed,
531 CharsetConverter* query_converter, 534 CharsetConverter* query_converter,
532 CanonOutput* output, 535 CanonOutput* output,
533 url_parse::Parsed* new_parsed); 536 url_parse::Parsed* new_parsed);
534 bool CanonicalizeFileSystemURL(const char16* spec, 537 URL_EXPORT bool CanonicalizeFileSystemURL(const char16* spec,
535 int spec_len, 538 int spec_len,
536 const url_parse::Parsed& parsed, 539 const url_parse::Parsed& parsed,
537 CharsetConverter* query_converter, 540 CharsetConverter* query_converter,
538 CanonOutput* output, 541 CanonOutput* output,
539 url_parse::Parsed* new_parsed); 542 url_parse::Parsed* new_parsed);
540 543
541 // Use for path URLs such as javascript. This does not modify the path in any 544 // Use for path URLs such as javascript. This does not modify the path in any
542 // way, for example, by escaping it. 545 // way, for example, by escaping it.
543 bool CanonicalizePathURL(const char* spec, 546 URL_EXPORT bool CanonicalizePathURL(const char* spec,
544 int spec_len, 547 int spec_len,
545 const url_parse::Parsed& parsed, 548 const url_parse::Parsed& parsed,
546 CanonOutput* output, 549 CanonOutput* output,
547 url_parse::Parsed* new_parsed); 550 url_parse::Parsed* new_parsed);
548 bool CanonicalizePathURL(const char16* spec, 551 URL_EXPORT bool CanonicalizePathURL(const char16* spec,
549 int spec_len, 552 int spec_len,
550 const url_parse::Parsed& parsed, 553 const url_parse::Parsed& parsed,
551 CanonOutput* output, 554 CanonOutput* output,
552 url_parse::Parsed* new_parsed); 555 url_parse::Parsed* new_parsed);
553 556
554 // Use for mailto URLs. This "canonicalizes" the url into a path and query 557 // Use for mailto URLs. This "canonicalizes" the url into a path and query
555 // component. It does not attempt to merge "to" fields. It uses UTF-8 for 558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
556 // the query encoding if there is a query. This is because a mailto URL is 559 // the query encoding if there is a query. This is because a mailto URL is
557 // really intended for an external mail program, and the encoding of a page, 560 // really intended for an external mail program, and the encoding of a page,
558 // etc. which would influence a query encoding normally are irrelevant. 561 // etc. which would influence a query encoding normally are irrelevant.
559 bool CanonicalizeMailtoURL(const char* spec, 562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
560 int spec_len, 563 int spec_len,
561 const url_parse::Parsed& parsed, 564 const url_parse::Parsed& parsed,
562 CanonOutput* output, 565 CanonOutput* output,
563 url_parse::Parsed* new_parsed); 566 url_parse::Parsed* new_parsed);
564 bool CanonicalizeMailtoURL(const char16* spec, 567 URL_EXPORT bool CanonicalizeMailtoURL(const char16* spec,
565 int spec_len, 568 int spec_len,
566 const url_parse::Parsed& parsed, 569 const url_parse::Parsed& parsed,
567 CanonOutput* output, 570 CanonOutput* output,
568 url_parse::Parsed* new_parsed); 571 url_parse::Parsed* new_parsed);
569 572
570 // Part replacer -------------------------------------------------------------- 573 // Part replacer --------------------------------------------------------------
571 574
572 // Internal structure used for storing separate strings for each component. 575 // Internal structure used for storing separate strings for each component.
573 // The basic canonicalization functions use this structure internally so that 576 // The basic canonicalization functions use this structure internally so that
574 // component replacement (different strings for different components) can be 577 // component replacement (different strings for different components) can be
575 // treated on the same code path as regular canonicalization (the same string 578 // treated on the same code path as regular canonicalization (the same string
576 // for each component). 579 // for each component).
577 // 580 //
578 // A url_parse::Parsed structure usually goes along with this. Those 581 // A url_parse::Parsed structure usually goes along with this. Those
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
743 // Replace component | (replacement string) (replacement component) 746 // Replace component | (replacement string) (replacement component)
744 // Delete component | (non-NULL) (invalid component: (0,-1)) 747 // Delete component | (non-NULL) (invalid component: (0,-1))
745 // 748 //
746 // We use a pointer to the empty string for the source when the component 749 // We use a pointer to the empty string for the source when the component
747 // should be deleted. 750 // should be deleted.
748 URLComponentSource<CHAR> sources_; 751 URLComponentSource<CHAR> sources_;
749 url_parse::Parsed components_; 752 url_parse::Parsed components_;
750 }; 753 };
751 754
752 // The base must be an 8-bit canonical URL. 755 // The base must be an 8-bit canonical URL.
753 bool ReplaceStandardURL(const char* base, 756 URL_EXPORT bool ReplaceStandardURL(const char* base,
754 const url_parse::Parsed& base_parsed, 757 const url_parse::Parsed& base_parsed,
755 const Replacements<char>& replacements, 758 const Replacements<char>& replacements,
756 CharsetConverter* query_converter, 759 CharsetConverter* query_converter,
757 CanonOutput* output, 760 CanonOutput* output,
758 url_parse::Parsed* new_parsed); 761 url_parse::Parsed* new_parsed);
759 bool ReplaceStandardURL(const char* base, 762 URL_EXPORT bool ReplaceStandardURL(const char* base,
760 const url_parse::Parsed& base_parsed, 763 const url_parse::Parsed& base_parsed,
761 const Replacements<char16>& replacements, 764 const Replacements<char16>& replacements,
762 CharsetConverter* query_converter, 765 CharsetConverter* query_converter,
763 CanonOutput* output, 766 CanonOutput* output,
764 url_parse::Parsed* new_parsed); 767 url_parse::Parsed* new_parsed);
765 768
766 // Filesystem URLs can only have the path, query, or ref replaced. 769 // Filesystem URLs can only have the path, query, or ref replaced.
767 // All other components will be ignored. 770 // All other components will be ignored.
768 bool ReplaceFileSystemURL(const char* base, 771 URL_EXPORT bool ReplaceFileSystemURL(const char* base,
769 const url_parse::Parsed& base_parsed, 772 const url_parse::Parsed& base_parsed,
770 const Replacements<char>& replacements, 773 const Replacements<char>& replacements,
771 CharsetConverter* query_converter, 774 CharsetConverter* query_converter,
772 CanonOutput* output, 775 CanonOutput* output,
773 url_parse::Parsed* new_parsed); 776 url_parse::Parsed* new_parsed);
774 bool ReplaceFileSystemURL(const char* base, 777 URL_EXPORT bool ReplaceFileSystemURL(const char* base,
775 const url_parse::Parsed& base_parsed, 778 const url_parse::Parsed& base_parsed,
776 const Replacements<char16>& replacements, 779 const Replacements<char16>& replacements,
777 CharsetConverter* query_converter, 780 CharsetConverter* query_converter,
778 CanonOutput* output, 781 CanonOutput* output,
779 url_parse::Parsed* new_parsed); 782 url_parse::Parsed* new_parsed);
780 783
781 // Replacing some parts of a file URL is not permitted. Everything except 784 // Replacing some parts of a file URL is not permitted. Everything except
782 // the host, path, query, and ref will be ignored. 785 // the host, path, query, and ref will be ignored.
783 bool ReplaceFileURL(const char* base, 786 URL_EXPORT bool ReplaceFileURL(const char* base,
784 const url_parse::Parsed& base_parsed, 787 const url_parse::Parsed& base_parsed,
785 const Replacements<char>& replacements, 788 const Replacements<char>& replacements,
786 CharsetConverter* query_converter, 789 CharsetConverter* query_converter,
787 CanonOutput* output, 790 CanonOutput* output,
788 url_parse::Parsed* new_parsed); 791 url_parse::Parsed* new_parsed);
789 bool ReplaceFileURL(const char* base, 792 URL_EXPORT bool ReplaceFileURL(const char* base,
790 const url_parse::Parsed& base_parsed, 793 const url_parse::Parsed& base_parsed,
791 const Replacements<char16>& replacements, 794 const Replacements<char16>& replacements,
792 CharsetConverter* query_converter, 795 CharsetConverter* query_converter,
793 CanonOutput* output, 796 CanonOutput* output,
794 url_parse::Parsed* new_parsed); 797 url_parse::Parsed* new_parsed);
795 798
796 // Path URLs can only have the scheme and path replaced. All other components 799 // Path URLs can only have the scheme and path replaced. All other components
797 // will be ignored. 800 // will be ignored.
798 bool ReplacePathURL(const char* base, 801 URL_EXPORT bool ReplacePathURL(const char* base,
799 const url_parse::Parsed& base_parsed, 802 const url_parse::Parsed& base_parsed,
800 const Replacements<char>& replacements, 803 const Replacements<char>& replacements,
801 CanonOutput* output, 804 CanonOutput* output,
802 url_parse::Parsed* new_parsed); 805 url_parse::Parsed* new_parsed);
803 bool ReplacePathURL(const char* base, 806 URL_EXPORT bool ReplacePathURL(const char* base,
804 const url_parse::Parsed& base_parsed, 807 const url_parse::Parsed& base_parsed,
805 const Replacements<char16>& replacements, 808 const Replacements<char16>& replacements,
806 CanonOutput* output, 809 CanonOutput* output,
807 url_parse::Parsed* new_parsed); 810 url_parse::Parsed* new_parsed);
808 811
809 // Mailto URLs can only have the scheme, path, and query replaced. 812 // Mailto URLs can only have the scheme, path, and query replaced.
810 // All other components will be ignored. 813 // All other components will be ignored.
811 bool ReplaceMailtoURL(const char* base, 814 URL_EXPORT bool ReplaceMailtoURL(const char* base,
812 const url_parse::Parsed& base_parsed, 815 const url_parse::Parsed& base_parsed,
813 const Replacements<char>& replacements, 816 const Replacements<char>& replacements,
814 CanonOutput* output, 817 CanonOutput* output,
815 url_parse::Parsed* new_parsed); 818 url_parse::Parsed* new_parsed);
816 bool ReplaceMailtoURL(const char* base, 819 URL_EXPORT bool ReplaceMailtoURL(const char* base,
817 const url_parse::Parsed& base_parsed, 820 const url_parse::Parsed& base_parsed,
818 const Replacements<char16>& replacements, 821 const Replacements<char16>& replacements,
819 CanonOutput* output, 822 CanonOutput* output,
820 url_parse::Parsed* new_parsed); 823 url_parse::Parsed* new_parsed);
821 824
822 // Relative URL --------------------------------------------------------------- 825 // Relative URL ---------------------------------------------------------------
823 826
824 // Given an input URL or URL fragment |fragment|, determines if it is a 827 // Given an input URL or URL fragment |fragment|, determines if it is a
825 // relative or absolute URL and places the result into |*is_relative|. If it is 828 // relative or absolute URL and places the result into |*is_relative|. If it is
826 // relative, the relevant portion of the URL will be placed into 829 // relative, the relevant portion of the URL will be placed into
827 // |*relative_component| (there may have been trimmed whitespace, for example). 830 // |*relative_component| (there may have been trimmed whitespace, for example).
828 // This value is passed to ResolveRelativeURL. If the input is not relative, 831 // This value is passed to ResolveRelativeURL. If the input is not relative,
829 // this value is UNDEFINED (it may be changed by the function). 832 // this value is UNDEFINED (it may be changed by the function).
830 // 833 //
831 // Returns true on success (we successfully determined the URL is relative or 834 // Returns true on success (we successfully determined the URL is relative or
832 // not). Failure means that the combination of URLs doesn't make any sense. 835 // not). Failure means that the combination of URLs doesn't make any sense.
833 // 836 //
834 // The base URL should always be canonical, therefore is ASCII. 837 // The base URL should always be canonical, therefore is ASCII.
835 bool IsRelativeURL(const char* base, 838 URL_EXPORT bool IsRelativeURL(const char* base,
836 const url_parse::Parsed& base_parsed, 839 const url_parse::Parsed& base_parsed,
837 const char* fragment, 840 const char* fragment,
838 int fragment_len, 841 int fragment_len,
839 bool is_base_hierarchical, 842 bool is_base_hierarchical,
840 bool* is_relative, 843 bool* is_relative,
841 url_parse::Component* relative_component); 844 url_parse::Component* relative_component);
842 bool IsRelativeURL(const char* base, 845 URL_EXPORT bool IsRelativeURL(const char* base,
843 const url_parse::Parsed& base_parsed, 846 const url_parse::Parsed& base_parsed,
844 const char16* fragment, 847 const char16* fragment,
845 int fragment_len, 848 int fragment_len,
846 bool is_base_hierarchical, 849 bool is_base_hierarchical,
847 bool* is_relative, 850 bool* is_relative,
848 url_parse::Component* relative_component); 851 url_parse::Component* relative_component);
849 852
850 // Given a canonical parsed source URL, a URL fragment known to be relative, 853 // Given a canonical parsed source URL, a URL fragment known to be relative,
851 // and the identified relevant portion of the relative URL (computed by 854 // and the identified relevant portion of the relative URL (computed by
852 // IsRelativeURL), this produces a new parsed canonical URL in |output| and 855 // IsRelativeURL), this produces a new parsed canonical URL in |output| and
853 // |out_parsed|. 856 // |out_parsed|.
854 // 857 //
855 // It also requires a flag indicating whether the base URL is a file: URL 858 // It also requires a flag indicating whether the base URL is a file: URL
856 // which triggers additional logic. 859 // which triggers additional logic.
857 // 860 //
858 // The base URL should be canonical and have a host (may be empty for file 861 // The base URL should be canonical and have a host (may be empty for file
859 // URLs) and a path. If it doesn't have these, we can't resolve relative 862 // URLs) and a path. If it doesn't have these, we can't resolve relative
860 // URLs off of it and will return the base as the output with an error flag. 863 // URLs off of it and will return the base as the output with an error flag.
861 // Becausee it is canonical is should also be ASCII. 864 // Becausee it is canonical is should also be ASCII.
862 // 865 //
863 // The query charset converter follows the same rules as CanonicalizeQuery. 866 // The query charset converter follows the same rules as CanonicalizeQuery.
864 // 867 //
865 // Returns true on success. On failure, the output will be "something 868 // Returns true on success. On failure, the output will be "something
866 // reasonable" that will be consistent and valid, just probably not what 869 // reasonable" that will be consistent and valid, just probably not what
867 // was intended by the web page author or caller. 870 // was intended by the web page author or caller.
868 bool ResolveRelativeURL(const char* base_url, 871 URL_EXPORT bool ResolveRelativeURL(
869 const url_parse::Parsed& base_parsed, 872 const char* base_url,
870 bool base_is_file, 873 const url_parse::Parsed& base_parsed,
871 const char* relative_url, 874 bool base_is_file,
872 const url_parse::Component& relative_component, 875 const char* relative_url,
873 CharsetConverter* query_converter, 876 const url_parse::Component& relative_component,
874 CanonOutput* output, 877 CharsetConverter* query_converter,
875 url_parse::Parsed* out_parsed); 878 CanonOutput* output,
876 bool ResolveRelativeURL(const char* base_url, 879 url_parse::Parsed* out_parsed);
877 const url_parse::Parsed& base_parsed, 880 URL_EXPORT bool ResolveRelativeURL(
878 bool base_is_file, 881 const char* base_url,
879 const char16* relative_url, 882 const url_parse::Parsed& base_parsed,
880 const url_parse::Component& relative_component, 883 bool base_is_file,
881 CharsetConverter* query_converter, 884 const char16* relative_url,
882 CanonOutput* output, 885 const url_parse::Component& relative_component,
883 url_parse::Parsed* out_parsed); 886 CharsetConverter* query_converter,
887 CanonOutput* output,
888 url_parse::Parsed* out_parsed);
884 889
885 } // namespace url_canon 890 } // namespace url_canon
886 891
887 #endif // URL_URL_CANON_H_ 892 #endif // URL_URL_CANON_H_
OLDNEW
« no previous file with comments | « trunk/src/url/url.gyp ('k') | trunk/src/url/url_canon_icu.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698