OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
| 7 #include <algorithm> |
7 #include <map> | 8 #include <map> |
8 #include <vector> | 9 #include <utility> |
9 | 10 |
10 #include "base/i18n/time_formatting.h" | |
11 #include "base/json/string_escape.h" | |
12 #include "base/lazy_instance.h" | 11 #include "base/lazy_instance.h" |
13 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/macros.h" |
14 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
16 #include "base/strings/string_tokenizer.h" | 16 #include "base/strings/string_tokenizer.h" |
17 #include "base/strings/string_util.h" | 17 #include "base/strings/string_util.h" |
18 #include "base/strings/utf_offset_string_conversions.h" | 18 #include "base/strings/utf_offset_string_conversions.h" |
19 #include "base/strings/utf_string_conversions.h" | 19 #include "base/strings/utf_string_conversions.h" |
20 #include "base/time/time.h" | 20 #include "base/synchronization/lock.h" |
21 #include "url/gurl.h" | |
22 #include "third_party/icu/source/common/unicode/uidna.h" | 21 #include "third_party/icu/source/common/unicode/uidna.h" |
23 #include "third_party/icu/source/common/unicode/uniset.h" | 22 #include "third_party/icu/source/common/unicode/uniset.h" |
24 #include "third_party/icu/source/common/unicode/uscript.h" | 23 #include "third_party/icu/source/common/unicode/uscript.h" |
25 #include "third_party/icu/source/common/unicode/uset.h" | |
26 #include "third_party/icu/source/i18n/unicode/datefmt.h" | |
27 #include "third_party/icu/source/i18n/unicode/regex.h" | 24 #include "third_party/icu/source/i18n/unicode/regex.h" |
28 #include "third_party/icu/source/i18n/unicode/ulocdata.h" | 25 #include "third_party/icu/source/i18n/unicode/ulocdata.h" |
29 | 26 #include "url/gurl.h" |
30 using base::Time; | 27 #include "url/third_party/mozilla/url_parse.h" |
31 | 28 |
32 namespace net { | 29 namespace url_formatter { |
33 | 30 |
34 namespace { | 31 namespace { |
35 | 32 |
36 typedef std::vector<size_t> Offsets; | 33 base::string16 IDNToUnicodeWithAdjustments( |
| 34 const std::string& host, |
| 35 const std::string& languages, |
| 36 base::OffsetAdjuster::Adjustments* adjustments); |
| 37 bool IDNToUnicodeOneComponent(const base::char16* comp, |
| 38 size_t comp_len, |
| 39 const std::string& languages, |
| 40 base::string16* out); |
| 41 |
| 42 class AppendComponentTransform { |
| 43 public: |
| 44 AppendComponentTransform() {} |
| 45 virtual ~AppendComponentTransform() {} |
| 46 |
| 47 virtual base::string16 Execute( |
| 48 const std::string& component_text, |
| 49 base::OffsetAdjuster::Adjustments* adjustments) const = 0; |
| 50 |
| 51 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
| 52 // accessible copy constructor in order to call AppendFormattedComponent() |
| 53 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). |
| 54 }; |
| 55 |
| 56 class HostComponentTransform : public AppendComponentTransform { |
| 57 public: |
| 58 explicit HostComponentTransform(const std::string& languages) |
| 59 : languages_(languages) {} |
| 60 |
| 61 private: |
| 62 base::string16 Execute( |
| 63 const std::string& component_text, |
| 64 base::OffsetAdjuster::Adjustments* adjustments) const override { |
| 65 return IDNToUnicodeWithAdjustments(component_text, languages_, |
| 66 adjustments); |
| 67 } |
| 68 |
| 69 const std::string& languages_; |
| 70 }; |
| 71 |
| 72 class NonHostComponentTransform : public AppendComponentTransform { |
| 73 public: |
| 74 explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) |
| 75 : unescape_rules_(unescape_rules) { |
| 76 } |
| 77 |
| 78 private: |
| 79 base::string16 Execute( |
| 80 const std::string& component_text, |
| 81 base::OffsetAdjuster::Adjustments* adjustments) const override { |
| 82 return (unescape_rules_ == net::UnescapeRule::NONE) ? |
| 83 base::UTF8ToUTF16WithAdjustments(component_text, adjustments) : |
| 84 net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(component_text, |
| 85 unescape_rules_, adjustments); |
| 86 } |
| 87 |
| 88 const net::UnescapeRule::Type unescape_rules_; |
| 89 }; |
| 90 |
| 91 // Transforms the portion of |spec| covered by |original_component| according to |
| 92 // |transform|. Appends the result to |output|. If |output_component| is |
| 93 // non-NULL, its start and length are set to the transformed component's new |
| 94 // start and length. If |adjustments| is non-NULL, appends adjustments (if |
| 95 // any) that reflect the transformation the original component underwent to |
| 96 // become the transformed value appended to |output|. |
| 97 void AppendFormattedComponent(const std::string& spec, |
| 98 const url::Component& original_component, |
| 99 const AppendComponentTransform& transform, |
| 100 base::string16* output, |
| 101 url::Component* output_component, |
| 102 base::OffsetAdjuster::Adjustments* adjustments) { |
| 103 DCHECK(output); |
| 104 if (original_component.is_nonempty()) { |
| 105 size_t original_component_begin = |
| 106 static_cast<size_t>(original_component.begin); |
| 107 size_t output_component_begin = output->length(); |
| 108 std::string component_str(spec, original_component_begin, |
| 109 static_cast<size_t>(original_component.len)); |
| 110 |
| 111 // Transform |component_str| and modify |adjustments| appropriately. |
| 112 base::OffsetAdjuster::Adjustments component_transform_adjustments; |
| 113 output->append( |
| 114 transform.Execute(component_str, &component_transform_adjustments)); |
| 115 |
| 116 // Shift all the adjustments made for this component so the offsets are |
| 117 // valid for the original string and add them to |adjustments|. |
| 118 for (base::OffsetAdjuster::Adjustments::iterator comp_iter = |
| 119 component_transform_adjustments.begin(); |
| 120 comp_iter != component_transform_adjustments.end(); ++comp_iter) |
| 121 comp_iter->original_offset += original_component_begin; |
| 122 if (adjustments) { |
| 123 adjustments->insert(adjustments->end(), |
| 124 component_transform_adjustments.begin(), |
| 125 component_transform_adjustments.end()); |
| 126 } |
| 127 |
| 128 // Set positions of the parsed component. |
| 129 if (output_component) { |
| 130 output_component->begin = static_cast<int>(output_component_begin); |
| 131 output_component->len = |
| 132 static_cast<int>(output->length() - output_component_begin); |
| 133 } |
| 134 } else if (output_component) { |
| 135 output_component->reset(); |
| 136 } |
| 137 } |
| 138 |
| 139 // If |component| is valid, its begin is incremented by |delta|. |
| 140 void AdjustComponent(int delta, url::Component* component) { |
| 141 if (!component->is_valid()) |
| 142 return; |
| 143 |
| 144 DCHECK(delta >= 0 || component->begin >= -delta); |
| 145 component->begin += delta; |
| 146 } |
| 147 |
| 148 // Adjusts all the components of |parsed| by |delta|, except for the scheme. |
| 149 void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) { |
| 150 AdjustComponent(delta, &(parsed->username)); |
| 151 AdjustComponent(delta, &(parsed->password)); |
| 152 AdjustComponent(delta, &(parsed->host)); |
| 153 AdjustComponent(delta, &(parsed->port)); |
| 154 AdjustComponent(delta, &(parsed->path)); |
| 155 AdjustComponent(delta, &(parsed->query)); |
| 156 AdjustComponent(delta, &(parsed->ref)); |
| 157 } |
| 158 |
| 159 // Helper for FormatUrlWithOffsets(). |
| 160 base::string16 FormatViewSourceUrl( |
| 161 const GURL& url, |
| 162 const std::string& languages, |
| 163 FormatUrlTypes format_types, |
| 164 net::UnescapeRule::Type unescape_rules, |
| 165 url::Parsed* new_parsed, |
| 166 size_t* prefix_end, |
| 167 base::OffsetAdjuster::Adjustments* adjustments) { |
| 168 DCHECK(new_parsed); |
| 169 const char kViewSource[] = "view-source:"; |
| 170 const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
| 171 |
| 172 // Format the underlying URL and record adjustments. |
| 173 const std::string& url_str(url.possibly_invalid_spec()); |
| 174 adjustments->clear(); |
| 175 base::string16 result(base::ASCIIToUTF16(kViewSource) + |
| 176 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), |
| 177 languages, format_types, unescape_rules, |
| 178 new_parsed, prefix_end, adjustments)); |
| 179 // Revise |adjustments| by shifting to the offsets to prefix that the above |
| 180 // call to FormatUrl didn't get to see. |
| 181 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); |
| 182 it != adjustments->end(); ++it) |
| 183 it->original_offset += kViewSourceLength; |
| 184 |
| 185 // Adjust positions of the parsed components. |
| 186 if (new_parsed->scheme.is_nonempty()) { |
| 187 // Assume "view-source:real-scheme" as a scheme. |
| 188 new_parsed->scheme.len += kViewSourceLength; |
| 189 } else { |
| 190 new_parsed->scheme.begin = 0; |
| 191 new_parsed->scheme.len = kViewSourceLength - 1; |
| 192 } |
| 193 AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); |
| 194 |
| 195 if (prefix_end) |
| 196 *prefix_end += kViewSourceLength; |
| 197 |
| 198 return result; |
| 199 } |
| 200 |
| 201 // TODO(brettw) bug 734373: check the scripts for each host component and |
| 202 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
| 203 // scripts that the user has installed. For now, just put the entire |
| 204 // path through IDN. Maybe this feature can be implemented in ICU itself? |
| 205 // |
| 206 // We may want to skip this step in the case of file URLs to allow unicode |
| 207 // UNC hostnames regardless of encodings. |
| 208 base::string16 IDNToUnicodeWithAdjustments( |
| 209 const std::string& host, |
| 210 const std::string& languages, |
| 211 base::OffsetAdjuster::Adjustments* adjustments) { |
| 212 if (adjustments) |
| 213 adjustments->clear(); |
| 214 // Convert the ASCII input to a base::string16 for ICU. |
| 215 base::string16 input16; |
| 216 input16.reserve(host.length()); |
| 217 input16.insert(input16.end(), host.begin(), host.end()); |
| 218 |
| 219 // Do each component of the host separately, since we enforce script matching |
| 220 // on a per-component basis. |
| 221 base::string16 out16; |
| 222 for (size_t component_start = 0, component_end; |
| 223 component_start < input16.length(); |
| 224 component_start = component_end + 1) { |
| 225 // Find the end of the component. |
| 226 component_end = input16.find('.', component_start); |
| 227 if (component_end == base::string16::npos) |
| 228 component_end = input16.length(); // For getting the last component. |
| 229 size_t component_length = component_end - component_start; |
| 230 size_t new_component_start = out16.length(); |
| 231 bool converted_idn = false; |
| 232 if (component_end > component_start) { |
| 233 // Add the substring that we just found. |
| 234 converted_idn = IDNToUnicodeOneComponent( |
| 235 input16.data() + component_start, component_length, languages, |
| 236 &out16); |
| 237 } |
| 238 size_t new_component_length = out16.length() - new_component_start; |
| 239 |
| 240 if (converted_idn && adjustments) { |
| 241 adjustments->push_back(base::OffsetAdjuster::Adjustment( |
| 242 component_start, component_length, new_component_length)); |
| 243 } |
| 244 |
| 245 // Need to add the dot we just found (if we found one). |
| 246 if (component_end < input16.length()) |
| 247 out16.push_back('.'); |
| 248 } |
| 249 return out16; |
| 250 } |
37 | 251 |
38 // Does some simple normalization of scripts so we can allow certain scripts | 252 // Does some simple normalization of scripts so we can allow certain scripts |
39 // to exist together. | 253 // to exist together. |
40 // TODO(brettw) bug 880223: we should allow some other languages to be | 254 // TODO(brettw) bug 880223: we should allow some other languages to be |
41 // oombined such as Chinese and Latin. We will probably need a more | 255 // oombined such as Chinese and Latin. We will probably need a more |
42 // complicated system of language pairs to have more fine-grained control. | 256 // complicated system of language pairs to have more fine-grained control. |
43 UScriptCode NormalizeScript(UScriptCode code) { | 257 UScriptCode NormalizeScript(UScriptCode code) { |
44 switch (code) { | 258 switch (code) { |
45 case USCRIPT_KATAKANA: | 259 case USCRIPT_KATAKANA: |
46 case USCRIPT_HIRAGANA: | 260 case USCRIPT_HIRAGANA: |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
353 // Something went wrong. Revert to original string. | 567 // Something went wrong. Revert to original string. |
354 out->resize(original_length); | 568 out->resize(original_length); |
355 } | 569 } |
356 | 570 |
357 // We get here with no IDN or on error, in which case we just append the | 571 // We get here with no IDN or on error, in which case we just append the |
358 // literal input. | 572 // literal input. |
359 out->append(comp, comp_len); | 573 out->append(comp, comp_len); |
360 return false; | 574 return false; |
361 } | 575 } |
362 | 576 |
363 // TODO(brettw) bug 734373: check the scripts for each host component and | |
364 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
365 // scripts that the user has installed. For now, just put the entire | |
366 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
367 // | |
368 // We may want to skip this step in the case of file URLs to allow unicode | |
369 // UNC hostnames regardless of encodings. | |
370 base::string16 IDNToUnicodeWithAdjustments( | |
371 const std::string& host, | |
372 const std::string& languages, | |
373 base::OffsetAdjuster::Adjustments* adjustments) { | |
374 if (adjustments) | |
375 adjustments->clear(); | |
376 // Convert the ASCII input to a base::string16 for ICU. | |
377 base::string16 input16; | |
378 input16.reserve(host.length()); | |
379 input16.insert(input16.end(), host.begin(), host.end()); | |
380 | |
381 // Do each component of the host separately, since we enforce script matching | |
382 // on a per-component basis. | |
383 base::string16 out16; | |
384 { | |
385 for (size_t component_start = 0, component_end; | |
386 component_start < input16.length(); | |
387 component_start = component_end + 1) { | |
388 // Find the end of the component. | |
389 component_end = input16.find('.', component_start); | |
390 if (component_end == base::string16::npos) | |
391 component_end = input16.length(); // For getting the last component. | |
392 size_t component_length = component_end - component_start; | |
393 size_t new_component_start = out16.length(); | |
394 bool converted_idn = false; | |
395 if (component_end > component_start) { | |
396 // Add the substring that we just found. | |
397 converted_idn = IDNToUnicodeOneComponent( | |
398 input16.data() + component_start, component_length, languages, | |
399 &out16); | |
400 } | |
401 size_t new_component_length = out16.length() - new_component_start; | |
402 | |
403 if (converted_idn && adjustments) { | |
404 adjustments->push_back(base::OffsetAdjuster::Adjustment( | |
405 component_start, component_length, new_component_length)); | |
406 } | |
407 | |
408 // Need to add the dot we just found (if we found one). | |
409 if (component_end < input16.length()) | |
410 out16.push_back('.'); | |
411 } | |
412 } | |
413 return out16; | |
414 } | |
415 | |
416 // If |component| is valid, its begin is incremented by |delta|. | |
417 void AdjustComponent(int delta, url::Component* component) { | |
418 if (!component->is_valid()) | |
419 return; | |
420 | |
421 DCHECK(delta >= 0 || component->begin >= -delta); | |
422 component->begin += delta; | |
423 } | |
424 | |
425 // Adjusts all the components of |parsed| by |delta|, except for the scheme. | |
426 void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) { | |
427 AdjustComponent(delta, &(parsed->username)); | |
428 AdjustComponent(delta, &(parsed->password)); | |
429 AdjustComponent(delta, &(parsed->host)); | |
430 AdjustComponent(delta, &(parsed->port)); | |
431 AdjustComponent(delta, &(parsed->path)); | |
432 AdjustComponent(delta, &(parsed->query)); | |
433 AdjustComponent(delta, &(parsed->ref)); | |
434 } | |
435 | |
436 // Helper for FormatUrlWithOffsets(). | |
437 base::string16 FormatViewSourceUrl( | |
438 const GURL& url, | |
439 const std::string& languages, | |
440 FormatUrlTypes format_types, | |
441 UnescapeRule::Type unescape_rules, | |
442 url::Parsed* new_parsed, | |
443 size_t* prefix_end, | |
444 base::OffsetAdjuster::Adjustments* adjustments) { | |
445 DCHECK(new_parsed); | |
446 const char kViewSource[] = "view-source:"; | |
447 const size_t kViewSourceLength = arraysize(kViewSource) - 1; | |
448 | |
449 // Format the underlying URL and record adjustments. | |
450 const std::string& url_str(url.possibly_invalid_spec()); | |
451 adjustments->clear(); | |
452 base::string16 result(base::ASCIIToUTF16(kViewSource) + | |
453 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), | |
454 languages, format_types, unescape_rules, | |
455 new_parsed, prefix_end, adjustments)); | |
456 // Revise |adjustments| by shifting to the offsets to prefix that the above | |
457 // call to FormatUrl didn't get to see. | |
458 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); | |
459 it != adjustments->end(); ++it) | |
460 it->original_offset += kViewSourceLength; | |
461 | |
462 // Adjust positions of the parsed components. | |
463 if (new_parsed->scheme.is_nonempty()) { | |
464 // Assume "view-source:real-scheme" as a scheme. | |
465 new_parsed->scheme.len += kViewSourceLength; | |
466 } else { | |
467 new_parsed->scheme.begin = 0; | |
468 new_parsed->scheme.len = kViewSourceLength - 1; | |
469 } | |
470 AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); | |
471 | |
472 if (prefix_end) | |
473 *prefix_end += kViewSourceLength; | |
474 | |
475 return result; | |
476 } | |
477 | |
478 class AppendComponentTransform { | |
479 public: | |
480 AppendComponentTransform() {} | |
481 virtual ~AppendComponentTransform() {} | |
482 | |
483 virtual base::string16 Execute( | |
484 const std::string& component_text, | |
485 base::OffsetAdjuster::Adjustments* adjustments) const = 0; | |
486 | |
487 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an | |
488 // accessible copy constructor in order to call AppendFormattedComponent() | |
489 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). | |
490 }; | |
491 | |
492 class HostComponentTransform : public AppendComponentTransform { | |
493 public: | |
494 explicit HostComponentTransform(const std::string& languages) | |
495 : languages_(languages) { | |
496 } | |
497 | |
498 private: | |
499 base::string16 Execute( | |
500 const std::string& component_text, | |
501 base::OffsetAdjuster::Adjustments* adjustments) const override { | |
502 return IDNToUnicodeWithAdjustments(component_text, languages_, | |
503 adjustments); | |
504 } | |
505 | |
506 const std::string& languages_; | |
507 }; | |
508 | |
509 class NonHostComponentTransform : public AppendComponentTransform { | |
510 public: | |
511 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) | |
512 : unescape_rules_(unescape_rules) { | |
513 } | |
514 | |
515 private: | |
516 base::string16 Execute( | |
517 const std::string& component_text, | |
518 base::OffsetAdjuster::Adjustments* adjustments) const override { | |
519 return (unescape_rules_ == UnescapeRule::NONE) ? | |
520 base::UTF8ToUTF16WithAdjustments(component_text, adjustments) : | |
521 UnescapeAndDecodeUTF8URLComponentWithAdjustments(component_text, | |
522 unescape_rules_, adjustments); | |
523 } | |
524 | |
525 const UnescapeRule::Type unescape_rules_; | |
526 }; | |
527 | |
528 // Transforms the portion of |spec| covered by |original_component| according to | |
529 // |transform|. Appends the result to |output|. If |output_component| is | |
530 // non-NULL, its start and length are set to the transformed component's new | |
531 // start and length. If |adjustments| is non-NULL, appends adjustments (if | |
532 // any) that reflect the transformation the original component underwent to | |
533 // become the transformed value appended to |output|. | |
534 void AppendFormattedComponent(const std::string& spec, | |
535 const url::Component& original_component, | |
536 const AppendComponentTransform& transform, | |
537 base::string16* output, | |
538 url::Component* output_component, | |
539 base::OffsetAdjuster::Adjustments* adjustments) { | |
540 DCHECK(output); | |
541 if (original_component.is_nonempty()) { | |
542 size_t original_component_begin = | |
543 static_cast<size_t>(original_component.begin); | |
544 size_t output_component_begin = output->length(); | |
545 std::string component_str(spec, original_component_begin, | |
546 static_cast<size_t>(original_component.len)); | |
547 | |
548 // Transform |component_str| and modify |adjustments| appropriately. | |
549 base::OffsetAdjuster::Adjustments component_transform_adjustments; | |
550 output->append( | |
551 transform.Execute(component_str, &component_transform_adjustments)); | |
552 | |
553 // Shift all the adjustments made for this component so the offsets are | |
554 // valid for the original string and add them to |adjustments|. | |
555 for (base::OffsetAdjuster::Adjustments::iterator comp_iter = | |
556 component_transform_adjustments.begin(); | |
557 comp_iter != component_transform_adjustments.end(); ++comp_iter) | |
558 comp_iter->original_offset += original_component_begin; | |
559 if (adjustments) { | |
560 adjustments->insert(adjustments->end(), | |
561 component_transform_adjustments.begin(), | |
562 component_transform_adjustments.end()); | |
563 } | |
564 | |
565 // Set positions of the parsed component. | |
566 if (output_component) { | |
567 output_component->begin = static_cast<int>(output_component_begin); | |
568 output_component->len = | |
569 static_cast<int>(output->length() - output_component_begin); | |
570 } | |
571 } else if (output_component) { | |
572 output_component->reset(); | |
573 } | |
574 } | |
575 | |
576 } // namespace | 577 } // namespace |
577 | 578 |
578 const FormatUrlType kFormatUrlOmitNothing = 0; | 579 const FormatUrlType kFormatUrlOmitNothing = 0; |
579 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 580 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
580 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 581 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
581 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 582 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
582 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 583 const FormatUrlType kFormatUrlOmitAll = |
583 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 584 kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | |
| 585 kFormatUrlOmitTrailingSlashOnBareHostname; |
584 | 586 |
585 base::string16 IDNToUnicode(const std::string& host, | 587 base::string16 FormatUrl(const GURL& url, |
586 const std::string& languages) { | 588 const std::string& languages, |
587 return IDNToUnicodeWithAdjustments(host, languages, NULL); | 589 FormatUrlTypes format_types, |
588 } | 590 net::UnescapeRule::Type unescape_rules, |
589 | 591 url::Parsed* new_parsed, |
590 std::string GetDirectoryListingEntry(const base::string16& name, | 592 size_t* prefix_end, |
591 const std::string& raw_bytes, | 593 size_t* offset_for_adjustment) { |
592 bool is_dir, | 594 std::vector<size_t> offsets; |
593 int64_t size, | 595 if (offset_for_adjustment) |
594 Time modified) { | 596 offsets.push_back(*offset_for_adjustment); |
595 std::string result; | 597 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
596 result.append("<script>addRow("); | 598 unescape_rules, new_parsed, prefix_end, &offsets); |
597 base::EscapeJSONString(name, true, &result); | 599 if (offset_for_adjustment) |
598 result.append(","); | 600 *offset_for_adjustment = offsets[0]; |
599 if (raw_bytes.empty()) { | |
600 base::EscapeJSONString(EscapePath(base::UTF16ToUTF8(name)), true, &result); | |
601 } else { | |
602 base::EscapeJSONString(EscapePath(raw_bytes), true, &result); | |
603 } | |
604 if (is_dir) { | |
605 result.append(",1,"); | |
606 } else { | |
607 result.append(",0,"); | |
608 } | |
609 | |
610 // Negative size means unknown or not applicable (e.g. directory). | |
611 base::string16 size_string; | |
612 if (size >= 0) | |
613 size_string = FormatBytesUnlocalized(size); | |
614 base::EscapeJSONString(size_string, true, &result); | |
615 | |
616 result.append(","); | |
617 | |
618 base::string16 modified_str; | |
619 // |modified| can be NULL in FTP listings. | |
620 if (!modified.is_null()) { | |
621 modified_str = base::TimeFormatShortDateAndTime(modified); | |
622 } | |
623 base::EscapeJSONString(modified_str, true, &result); | |
624 | |
625 result.append(");</script>\n"); | |
626 | |
627 return result; | 601 return result; |
628 } | 602 } |
629 | 603 |
630 void AppendFormattedHost(const GURL& url, | |
631 const std::string& languages, | |
632 base::string16* output) { | |
633 AppendFormattedComponent(url.possibly_invalid_spec(), | |
634 url.parsed_for_possibly_invalid_spec().host, | |
635 HostComponentTransform(languages), output, NULL, NULL); | |
636 } | |
637 | |
638 base::string16 FormatUrlWithOffsets( | 604 base::string16 FormatUrlWithOffsets( |
639 const GURL& url, | 605 const GURL& url, |
640 const std::string& languages, | 606 const std::string& languages, |
641 FormatUrlTypes format_types, | 607 FormatUrlTypes format_types, |
642 UnescapeRule::Type unescape_rules, | 608 net::UnescapeRule::Type unescape_rules, |
643 url::Parsed* new_parsed, | 609 url::Parsed* new_parsed, |
644 size_t* prefix_end, | 610 size_t* prefix_end, |
645 std::vector<size_t>* offsets_for_adjustment) { | 611 std::vector<size_t>* offsets_for_adjustment) { |
646 base::OffsetAdjuster::Adjustments adjustments; | 612 base::OffsetAdjuster::Adjustments adjustments; |
647 const base::string16& format_url_return_value = | 613 const base::string16& format_url_return_value = |
648 FormatUrlWithAdjustments(url, languages, format_types, unescape_rules, | 614 FormatUrlWithAdjustments(url, languages, format_types, unescape_rules, |
649 new_parsed, prefix_end, &adjustments); | 615 new_parsed, prefix_end, &adjustments); |
650 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); | 616 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); |
651 if (offsets_for_adjustment) { | 617 if (offsets_for_adjustment) { |
652 std::for_each( | 618 std::for_each( |
653 offsets_for_adjustment->begin(), | 619 offsets_for_adjustment->begin(), |
654 offsets_for_adjustment->end(), | 620 offsets_for_adjustment->end(), |
655 base::LimitOffset<std::string>(format_url_return_value.length())); | 621 base::LimitOffset<std::string>(format_url_return_value.length())); |
656 } | 622 } |
657 return format_url_return_value; | 623 return format_url_return_value; |
658 } | 624 } |
659 | 625 |
660 base::string16 FormatUrlWithAdjustments( | 626 base::string16 FormatUrlWithAdjustments( |
661 const GURL& url, | 627 const GURL& url, |
662 const std::string& languages, | 628 const std::string& languages, |
663 FormatUrlTypes format_types, | 629 FormatUrlTypes format_types, |
664 UnescapeRule::Type unescape_rules, | 630 net::UnescapeRule::Type unescape_rules, |
665 url::Parsed* new_parsed, | 631 url::Parsed* new_parsed, |
666 size_t* prefix_end, | 632 size_t* prefix_end, |
667 base::OffsetAdjuster::Adjustments* adjustments) { | 633 base::OffsetAdjuster::Adjustments* adjustments) { |
668 DCHECK(adjustments != NULL); | 634 DCHECK(adjustments != NULL); |
669 adjustments->clear(); | 635 adjustments->clear(); |
670 url::Parsed parsed_temp; | 636 url::Parsed parsed_temp; |
671 if (!new_parsed) | 637 if (!new_parsed) |
672 new_parsed = &parsed_temp; | 638 new_parsed = &parsed_temp; |
673 else | 639 else |
674 *new_parsed = url::Parsed(); | 640 *new_parsed = url::Parsed(); |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
781 if (parsed.query.is_valid()) | 747 if (parsed.query.is_valid()) |
782 url_string.push_back('?'); | 748 url_string.push_back('?'); |
783 AppendFormattedComponent(spec, parsed.query, | 749 AppendFormattedComponent(spec, parsed.query, |
784 NonHostComponentTransform(unescape_rules), | 750 NonHostComponentTransform(unescape_rules), |
785 &url_string, &new_parsed->query, adjustments); | 751 &url_string, &new_parsed->query, adjustments); |
786 | 752 |
787 // Ref. This is valid, unescaped UTF-8, so we can just convert. | 753 // Ref. This is valid, unescaped UTF-8, so we can just convert. |
788 if (parsed.ref.is_valid()) | 754 if (parsed.ref.is_valid()) |
789 url_string.push_back('#'); | 755 url_string.push_back('#'); |
790 AppendFormattedComponent(spec, parsed.ref, | 756 AppendFormattedComponent(spec, parsed.ref, |
791 NonHostComponentTransform(UnescapeRule::NONE), | 757 NonHostComponentTransform(net::UnescapeRule::NONE), |
792 &url_string, &new_parsed->ref, adjustments); | 758 &url_string, &new_parsed->ref, adjustments); |
793 | 759 |
794 // If we need to strip out http do it after the fact. | 760 // If we need to strip out http do it after the fact. |
795 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { | 761 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { |
796 const size_t kHTTPSize = arraysize(kHTTP) - 1; | 762 const size_t kHTTPSize = arraysize(kHTTP) - 1; |
797 url_string = url_string.substr(kHTTPSize); | 763 url_string = url_string.substr(kHTTPSize); |
798 // Because offsets in the |adjustments| are already calculated with respect | 764 // Because offsets in the |adjustments| are already calculated with respect |
799 // to the string with the http:// prefix in it, those offsets remain correct | 765 // to the string with the http:// prefix in it, those offsets remain correct |
800 // after stripping the prefix. The only thing necessary is to add an | 766 // after stripping the prefix. The only thing necessary is to add an |
801 // adjustment to reflect the stripped prefix. | 767 // adjustment to reflect the stripped prefix. |
802 adjustments->insert(adjustments->begin(), | 768 adjustments->insert(adjustments->begin(), |
803 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); | 769 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); |
804 | 770 |
805 if (prefix_end) | 771 if (prefix_end) |
806 *prefix_end -= kHTTPSize; | 772 *prefix_end -= kHTTPSize; |
807 | 773 |
808 // Adjust new_parsed. | 774 // Adjust new_parsed. |
809 DCHECK(new_parsed->scheme.is_valid()); | 775 DCHECK(new_parsed->scheme.is_valid()); |
810 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | 776 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. |
811 new_parsed->scheme.reset(); | 777 new_parsed->scheme.reset(); |
812 AdjustAllComponentsButScheme(delta, new_parsed); | 778 AdjustAllComponentsButScheme(delta, new_parsed); |
813 } | 779 } |
814 | 780 |
815 return url_string; | 781 return url_string; |
816 } | 782 } |
817 | 783 |
818 base::string16 FormatUrl(const GURL& url, | 784 bool CanStripTrailingSlash(const GURL& url) { |
819 const std::string& languages, | 785 // Omit the path only for standard, non-file URLs with nothing but "/" after |
820 FormatUrlTypes format_types, | 786 // the hostname. |
821 UnescapeRule::Type unescape_rules, | 787 return url.IsStandard() && !url.SchemeIsFile() && |
822 url::Parsed* new_parsed, | 788 !url.SchemeIsFileSystem() && !url.has_query() && !url.has_ref() |
823 size_t* prefix_end, | 789 && url.path() == "/"; |
824 size_t* offset_for_adjustment) { | |
825 Offsets offsets; | |
826 if (offset_for_adjustment) | |
827 offsets.push_back(*offset_for_adjustment); | |
828 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, | |
829 unescape_rules, new_parsed, prefix_end, &offsets); | |
830 if (offset_for_adjustment) | |
831 *offset_for_adjustment = offsets[0]; | |
832 return result; | |
833 } | 790 } |
834 | 791 |
835 } // namespace net | 792 void AppendFormattedHost(const GURL& url, |
| 793 const std::string& languages, |
| 794 base::string16* output) { |
| 795 AppendFormattedComponent( |
| 796 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, |
| 797 HostComponentTransform(languages), output, NULL, NULL); |
| 798 } |
| 799 |
| 800 base::string16 IDNToUnicode(const std::string& host, |
| 801 const std::string& languages) { |
| 802 return IDNToUnicodeWithAdjustments(host, languages, NULL); |
| 803 } |
| 804 |
| 805 |
| 806 } // url_formatter |
OLD | NEW |