Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 90 // @ A B C D E F G H I J K L M N O | 90 // @ A B C D E F G H I J K L M N O |
| 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 92 // P Q R S T U V W X Y Z [ \ ] ^ _ | 92 // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, | 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
| 94 // ` a b c d e f g h i j k l m n o | 94 // ` a b c d e f g h i j k l m n o |
| 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 96 // p q r s t u v w x y z { | } ~ <NBSP> | 96 // p q r s t u v w x y z { | } ~ <NBSP> |
| 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 | 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
| 98 }; | 98 }; |
| 99 | 99 |
| 100 // Unescapes the escape sequence starting at index in escaped_text into unsigned | |
| 101 // char value. | |
|
Peter Kasting
2014/02/27 04:43:04
Nit:
// Attempts to unescape the sequence at |ind
Anuj
2014/02/27 19:38:45
Done.
| |
| 102 template<typename STR> | |
| 103 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, | |
| 104 int index, | |
|
Peter Kasting
2014/02/27 04:43:04
This should be a size_t.
Nit: Indenting (2 lines)
Anuj
2014/02/27 19:38:45
Done.
| |
| 105 unsigned char* value) { | |
| 106 const typename STR::value_type most_sig_digit( | |
|
Peter Kasting
2014/02/27 04:43:04
This function should also check whether escaped_te
Anuj
2014/02/27 19:38:45
Done.
| |
| 107 static_cast<typename STR::value_type>(escaped_text[index + 1])); | |
| 108 const typename STR::value_type least_sig_digit( | |
| 109 static_cast<typename STR::value_type>(escaped_text[index + 2])); | |
| 110 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { | |
| 111 *value = HexDigitToInt(most_sig_digit) * 16 + | |
| 112 HexDigitToInt(least_sig_digit); | |
| 113 return true; | |
| 114 } | |
| 115 return false; | |
| 116 } | |
| 117 | |
| 100 template<typename STR> | 118 template<typename STR> |
| 101 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, | 119 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
| 102 UnescapeRule::Type rules, | 120 UnescapeRule::Type rules, |
| 103 std::vector<size_t>* offsets_for_adjustment) { | 121 std::vector<size_t>* offsets_for_adjustment) { |
| 104 if (offsets_for_adjustment) { | 122 if (offsets_for_adjustment) { |
| 105 std::for_each(offsets_for_adjustment->begin(), | 123 std::for_each(offsets_for_adjustment->begin(), |
| 106 offsets_for_adjustment->end(), | 124 offsets_for_adjustment->end(), |
| 107 base::LimitOffset<STR>(escaped_text.length())); | 125 base::LimitOffset<STR>(escaped_text.length())); |
| 108 } | 126 } |
| 109 // Do not unescape anything, return the |escaped_text| text. | 127 // Do not unescape anything, return the |escaped_text| text. |
| 110 if (rules == UnescapeRule::NONE) | 128 if (rules == UnescapeRule::NONE) |
| 111 return escaped_text; | 129 return escaped_text; |
| 112 | 130 |
| 113 // The output of the unescaping is always smaller than the input, so we can | 131 // The output of the unescaping is always smaller than the input, so we can |
| 114 // reserve the input size to make sure we have enough buffer and don't have | 132 // reserve the input size to make sure we have enough buffer and don't have |
| 115 // to allocate in the loop below. | 133 // to allocate in the loop below. |
| 116 STR result; | 134 STR result; |
| 117 result.reserve(escaped_text.length()); | 135 result.reserve(escaped_text.length()); |
| 118 | 136 |
| 119 // Locations of adjusted text. | 137 // Locations of adjusted text. |
| 120 net::internal::AdjustEncodingOffset::Adjustments adjustments; | 138 net::internal::AdjustEncodingOffset::Adjustments adjustments; |
| 121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { | 139 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { |
| 122 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { | 140 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { |
| 123 // Non ASCII character, append as is. | 141 // Non ASCII character, append as is. |
| 124 result.push_back(escaped_text[i]); | 142 result.push_back(escaped_text[i]); |
| 125 continue; | 143 continue; |
| 126 } | 144 } |
| 127 | 145 |
| 128 char current_char = static_cast<char>(escaped_text[i]); | 146 char current_char = static_cast<char>(escaped_text[i]); |
| 129 if (current_char == '%' && i + 2 < max) { | 147 if (current_char == '%' && i + 2 < max) { |
|
Peter Kasting
2014/02/27 04:43:04
If you add the checks mentioned above, |max| can b
Anuj
2014/02/27 19:38:45
Done.
| |
| 130 const typename STR::value_type most_sig_digit( | 148 unsigned char value; |
| 131 static_cast<typename STR::value_type>(escaped_text[i + 1])); | 149 if (UnescapeUnsignedCharAtIndex(escaped_text, i, &value)) { |
| 132 const typename STR::value_type least_sig_digit( | 150 // As per http://tools.ietf.org/html/rfc3987#section-4.1, BiDi control |
| 133 static_cast<typename STR::value_type>(escaped_text[i + 2])); | 151 // characters are disallowed. The BiDi control characters in escaped |
| 134 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { | 152 // form are : |
| 135 unsigned char value = HexDigitToInt(most_sig_digit) * 16 + | 153 // kRightToLeftMark = "%E2%80%8F" |
| 136 HexDigitToInt(least_sig_digit); | 154 // kLeftToRightMark = "%E2%80%8E" |
| 155 // kLeftToRightEmbeddingMark = "%E2%80%AA" | |
| 156 // kRightToLeftEmbeddingMark = "%E2%80%AB" | |
| 157 // kPopDirectionalFormatting = "%E2%80%AC" | |
| 158 // kLeftToRightOverride = "%E2%80%AD" | |
| 159 // kRightToLeftOverride = "%E2%80%AE" | |
|
Peter Kasting
2014/02/27 04:43:04
Nit: Don't use kNames for things that are just com
Anuj
2014/02/27 19:38:45
Done.
| |
| 160 if (value == 0xE2 && i + 8 < max) { | |
|
Peter Kasting
2014/02/27 04:43:04
If you add the checks mentioned above, you can eli
Anuj
2014/02/27 19:38:45
Done.
| |
| 161 // Possible BiDi control character. | |
| 162 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &value); | |
| 163 if (value == 0x80) { | |
| 164 UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &value); | |
| 165 if (value == 0xAA || value == 0xAB || value == 0xAC || | |
| 166 value == 0xAD || value == 0xAE || value == 0x8E || | |
| 167 value == 0x8F) { | |
|
Peter Kasting
2014/02/27 04:43:04
Nit: Simpler:
if ((value == 0x8E) || (value == 0x
Anuj
2014/02/27 19:38:45
Done.
| |
| 168 result.append(escaped_text, i, 9); | |
| 169 i += 8; | |
| 170 continue; | |
| 171 } | |
| 172 } | |
| 173 // Restore value if BiDi control character not found. | |
|
Peter Kasting
2014/02/27 04:43:04
Prefer declaring a different temp to hold the seco
Anuj
2014/02/27 19:38:45
Done.
| |
| 174 value = 0xE2; | |
| 175 } | |
| 137 if (value >= 0x80 || // Unescape all high-bit characters. | 176 if (value >= 0x80 || // Unescape all high-bit characters. |
| 138 // For 7-bit characters, the lookup table tells us all valid chars. | 177 // For 7-bit characters, the lookup table tells us all valid chars. |
| 139 (kUrlUnescape[value] || | 178 (kUrlUnescape[value] || |
| 140 // ...and we allow some additional unescaping when flags are set. | 179 // ...and we allow some additional unescaping when flags are set. |
| 141 (value == ' ' && (rules & UnescapeRule::SPACES)) || | 180 (value == ' ' && (rules & UnescapeRule::SPACES)) || |
| 142 // Allow any of the prohibited but non-control characters when | 181 // Allow any of the prohibited but non-control characters when |
| 143 // we're doing "special" chars. | 182 // we're doing "special" chars. |
| 144 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || | 183 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || |
| 145 // Additionally allow control characters if requested. | 184 // Additionally allow control characters if requested. |
| 146 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { | 185 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { |
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 385 return; | 424 return; |
| 386 } | 425 } |
| 387 adjusted_offset -= 2; | 426 adjusted_offset -= 2; |
| 388 } | 427 } |
| 389 offset = adjusted_offset; | 428 offset = adjusted_offset; |
| 390 } | 429 } |
| 391 | 430 |
| 392 } // namespace internal | 431 } // namespace internal |
| 393 | 432 |
| 394 } // namespace net | 433 } // namespace net |
| OLD | NEW |