OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef NET_BASE_ESCAPE_H_ | 5 #ifndef NET_BASE_ESCAPE_H_ |
6 #define NET_BASE_ESCAPE_H_ | 6 #define NET_BASE_ESCAPE_H_ |
7 | 7 |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 | 9 |
10 #include <string> | 10 #include <string> |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
71 | 71 |
72 enum { | 72 enum { |
73 // Don't unescape anything at all. | 73 // Don't unescape anything at all. |
74 NONE = 0, | 74 NONE = 0, |
75 | 75 |
76 // Don't unescape anything special, but all normal unescaping will happen. | 76 // Don't unescape anything special, but all normal unescaping will happen. |
77 // This is a placeholder and can't be combined with other flags (since it's | 77 // This is a placeholder and can't be combined with other flags (since it's |
78 // just the absence of them). All other unescape rules imply "normal" in | 78 // just the absence of them). All other unescape rules imply "normal" in |
79 // addition to their special meaning. Things like escaped letters, digits, | 79 // addition to their special meaning. Things like escaped letters, digits, |
80 // and most symbols will get unescaped with this mode. | 80 // and most symbols will get unescaped with this mode. |
81 NORMAL = 1, | 81 NORMAL = 1 << 0, |
82 | 82 |
83 // Convert %20 to spaces. In some places where we're showing URLs, we may | 83 // Convert %20 to spaces. In some places where we're showing URLs, we may |
84 // want this. In places where the URL may be copied and pasted out, then | 84 // want this. In places where the URL may be copied and pasted out, then |
85 // you wouldn't want this since it might not be interpreted in one piece | 85 // you wouldn't want this since it might not be interpreted in one piece |
86 // by other applications. | 86 // by other applications. |
87 SPACES = 2, | 87 SPACES = 1 << 1, |
88 | |
89 // Unescapes '/' and '\\'. If these characters were unescaped, the resulting | |
90 // URL won't be the same as the source one. Moreover, they are dangerous to | |
91 // unescape in strings that will be used as file paths or names. This value | |
92 // should be used rarely, and only with extreme caution. | |
brettw
2016/02/22 23:33:24
I think it would be worth mentioning here that the
mmenke
2016/02/23 15:59:39
Done
| |
93 PATH_SEPARATORS = 1 << 2, | |
88 | 94 |
89 // Unescapes various characters that will change the meaning of URLs, | 95 // Unescapes various characters that will change the meaning of URLs, |
90 // including '%', '+', '&', '/', '#'. If we unescaped these characters, the | 96 // including '%', '+', '&', '#'. Does not unescape path separators. |
91 // resulting URL won't be the same as the source one. This flag is used when | 97 // If these characters were unescaped, the resulting URL won't be the same |
92 // generating final output like filenames for URLs where we won't be | 98 // as the source one. This flag is used when generating final output like |
93 // interpreting as a URL and want to do as much unescaping as possible. | 99 // filenames for URLs where we won't be interpreting as a URL and want to do |
94 URL_SPECIAL_CHARS = 4, | 100 // as much unescaping as possible. |
101 URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3, | |
102 | |
103 // A combination of URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS and | |
104 // PATH_SEPARATORS. Warning about the use of PATH_SEPARATORS also apply | |
105 // here. | |
106 // TODO(mmenke): Audit all uses of this and replace with the above values, | |
107 // as needed. | |
108 URL_SPECIAL_CHARS = | |
109 PATH_SEPARATORS | URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS, | |
95 | 110 |
96 // Unescapes characters that can be used in spoofing attempts (such as LOCK) | 111 // Unescapes characters that can be used in spoofing attempts (such as LOCK) |
97 // and control characters (such as BiDi control characters and %01). This | 112 // and control characters (such as BiDi control characters and %01). This |
98 // INCLUDES NULLs. This is used for rare cases such as data: URL decoding | 113 // INCLUDES NULLs. This is used for rare cases such as data: URL decoding |
99 // where the result is binary data. | 114 // where the result is binary data. |
100 // | 115 // |
101 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the URL is going to be displayed | 116 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the URL is going to be displayed |
102 // in the UI for security reasons. | 117 // in the UI for security reasons. |
103 SPOOFING_AND_CONTROL_CHARS = 8, | 118 SPOOFING_AND_CONTROL_CHARS = 1 << 4, |
104 | 119 |
105 // URL queries use "+" for space. This flag controls that replacement. | 120 // URL queries use "+" for space. This flag controls that replacement. |
106 REPLACE_PLUS_WITH_SPACE = 16, | 121 REPLACE_PLUS_WITH_SPACE = 1 << 5, |
107 }; | 122 }; |
108 }; | 123 }; |
109 | 124 |
110 // Unescapes |escaped_text| and returns the result. | 125 // Unescapes |escaped_text| and returns the result. |
111 // Unescaping consists of looking for the exact pattern "%XX", where each X is | 126 // Unescaping consists of looking for the exact pattern "%XX", where each X is |
112 // a hex digit, and converting to the character with the numerical value of | 127 // a hex digit, and converting to the character with the numerical value of |
113 // those digits. Thus "i%20=%203%3b" unescapes to "i = 3;". | 128 // those digits. Thus "i%20=%203%3b" unescapes to "i = 3;". |
114 // | 129 // |
115 // Watch out: this doesn't necessarily result in the correct final result, | 130 // Watch out: this doesn't necessarily result in the correct final result, |
116 // because the encoding may be unknown. For example, the input might be ASCII, | 131 // because the encoding may be unknown. For example, the input might be ASCII, |
(...skipping 20 matching lines...) Expand all Loading... | |
137 UnescapeRule::Type rules, | 152 UnescapeRule::Type rules, |
138 base::OffsetAdjuster::Adjustments* adjustments); | 153 base::OffsetAdjuster::Adjustments* adjustments); |
139 | 154 |
140 // Unescapes the following ampersand character codes from |text|: | 155 // Unescapes the following ampersand character codes from |text|: |
141 // < > & " ' | 156 // < > & " ' |
142 NET_EXPORT base::string16 UnescapeForHTML(const base::string16& text); | 157 NET_EXPORT base::string16 UnescapeForHTML(const base::string16& text); |
143 | 158 |
144 } // namespace net | 159 } // namespace net |
145 | 160 |
146 #endif // NET_BASE_ESCAPE_H_ | 161 #endif // NET_BASE_ESCAPE_H_ |
OLD | NEW |