OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "net/base/escape.h" | 7 #include "net/base/escape.h" |
8 | 8 |
9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
64 return escaped; | 64 return escaped; |
65 } | 65 } |
66 | 66 |
67 // Contains nonzero when the corresponding character is unescapable for normal | 67 // Contains nonzero when the corresponding character is unescapable for normal |
68 // URLs. These characters are the ones that may change the parsing of a URL, so | 68 // URLs. These characters are the ones that may change the parsing of a URL, so |
69 // we don't want to unescape them sometimes. In many case we won't want to | 69 // we don't want to unescape them sometimes. In many case we won't want to |
70 // unescape spaces, but that is controlled by parameters to Unescape*. | 70 // unescape spaces, but that is controlled by parameters to Unescape*. |
71 // | 71 // |
72 // The basic rule is that we can't unescape anything that would changing parsing | 72 // The basic rule is that we can't unescape anything that would changing parsing |
73 // like # or ?. We also can't unescape &, =, or + since that could be part of a | 73 // like # or ?. We also can't unescape &, =, or + since that could be part of a |
74 // query and that could change the server's parsing of the query. | 74 // query and that could change the server's parsing of the query. Nor can we |
| 75 // unescape \ since googleurl will convert it to a /. |
75 // | 76 // |
76 // Lastly, we can't unescape anything that doesn't have a canonical | 77 // Lastly, we can't unescape anything that doesn't have a canonical |
77 // representation in a URL. This means that unescaping will change the URL, and | 78 // representation in a URL. This means that unescaping will change the URL, and |
78 // you could get different behavior if you copy and paste the URL, or press | 79 // you could get different behavior if you copy and paste the URL, or press |
79 // enter in the URL bar. The list of characters that fall into this category | 80 // enter in the URL bar. The list of characters that fall into this category |
80 // are the ones labeled PASS (allow either escaped or unescaped) in the big | 81 // are the ones labeled PASS (allow either escaped or unescaped) in the big |
81 // lookup table at the top of googleurl/src/url_canon_path.cc | 82 // lookup table at the top of googleurl/src/url_canon_path.cc |
82 const char kUrlUnescape[128] = { | 83 const char kUrlUnescape[128] = { |
83 // NULL, control chars... | 84 // NULL, control chars... |
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
86 // ' ' ! " # $ % & ' ( ) * + , - . / | 87 // ' ' ! " # $ % & ' ( ) * + , - . / |
87 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, | 88 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
88 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? | 89 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, | 90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, |
90 // @ A B C D E F G H I J K L M N O | 91 // @ A B C D E F G H I J K L M N O |
91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 92 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
92 // P Q R S T U V W X Y Z [ \ ] ^ _ | 93 // P Q R S T U V W X Y Z [ \ ] ^ _ |
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, | 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, |
94 // ` a b c d e f g h i j k l m n o | 95 // ` a b c d e f g h i j k l m n o |
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
96 // p q r s t u v w x y z { | } ~ <NBSP> | 97 // p q r s t u v w x y z { | } ~ <NBSP> |
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 98 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 |
98 }; | 99 }; |
99 | 100 |
100 template<typename STR> | 101 template<typename STR> |
101 STR UnescapeURLImpl(const STR& escaped_text, | 102 STR UnescapeURLImpl(const STR& escaped_text, |
102 UnescapeRule::Type rules, | 103 UnescapeRule::Type rules, |
103 size_t* offset_for_adjustment) { | 104 size_t* offset_for_adjustment) { |
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
356 if (text.find(ampersand_chars[i], index) == index) { | 357 if (text.find(ampersand_chars[i], index) == index) { |
357 text.replace(iter, iter + ampersand_chars[i].length(), | 358 text.replace(iter, iter + ampersand_chars[i].length(), |
358 1, kEscapeToChars[i].replacement); | 359 1, kEscapeToChars[i].replacement); |
359 break; | 360 break; |
360 } | 361 } |
361 } | 362 } |
362 } | 363 } |
363 } | 364 } |
364 return text; | 365 return text; |
365 } | 366 } |
OLD | NEW |