Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/uri.h" | 5 #include "src/uri.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/handles.h" | 8 #include "src/handles.h" |
| 9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
| 10 #include "src/list.h" | 10 #include "src/list.h" |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 53 } | 53 } |
| 54 } | 54 } |
| 55 | 55 |
| 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { | 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
| 57 buffer->Add('%'); | 57 buffer->Add('%'); |
| 58 buffer->Add(HexCharOfValue(octet >> 4)); | 58 buffer->Add(HexCharOfValue(octet >> 4)); |
| 59 buffer->Add(HexCharOfValue(octet & 0x0F)); | 59 buffer->Add(HexCharOfValue(octet & 0x0F)); |
| 60 } | 60 } |
| 61 | 61 |
| 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { | 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
| 63 uint8_t x = (c >> 12) & 0xF; | 63 char s[4]; |
| 64 uint8_t y = (c >> 6) & 63; | 64 int number_of_bytes; |
| 65 uint8_t z = c & 63; | 65 number_of_bytes = |
| 66 if (c <= 0x007F) { | 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
| 67 AddHexEncodedToBuffer(c, buffer); | 67 for (int k = 0; k < number_of_bytes; k++) { |
| 68 } else if (c <= 0x07FF) { | 68 AddHexEncodedToBuffer(s[k], buffer); |
| 69 AddHexEncodedToBuffer(y + 192, buffer); | |
| 70 AddHexEncodedToBuffer(z + 128, buffer); | |
| 71 } else { | |
| 72 AddHexEncodedToBuffer(x + 224, buffer); | |
| 73 AddHexEncodedToBuffer(y + 128, buffer); | |
| 74 AddHexEncodedToBuffer(z + 128, buffer); | |
| 75 } | 69 } |
| 76 } | 70 } |
| 77 | 71 |
| 78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { | 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
| 79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; | 73 char s[4]; |
| 80 uint8_t w = (cc1 >> 2) & 0xF; | 74 int number_of_bytes = |
| 81 uint8_t x = cc1 & 3; | 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
| 82 uint8_t y = (cc2 >> 6) & 0xF; | 76 unibrow::Utf16::kNoPreviousCharacter, false); |
| 83 uint8_t z = cc2 & 63; | 77 for (int k = 0; k < number_of_bytes; k++) { |
| 84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); | 78 AddHexEncodedToBuffer(s[k], buffer); |
| 85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); | 79 } |
| 86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); | |
| 87 AddHexEncodedToBuffer(z + 128, buffer); | |
| 88 } | 80 } |
| 89 | 81 |
| 90 } // anonymous namespace | 82 } // anonymous namespace |
| 91 | 83 |
| 92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { | 84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri, |
| 85 bool is_uri) { | |
| 93 uri = String::Flatten(uri); | 86 uri = String::Flatten(uri); |
| 94 int uri_length = uri->length(); | 87 int uri_length = uri->length(); |
| 95 List<uint8_t> buffer(uri_length); | 88 List<uint8_t> buffer(uri_length); |
| 96 | 89 |
| 97 { | 90 { |
| 98 DisallowHeapAllocation no_gc; | 91 DisallowHeapAllocation no_gc; |
| 99 String::FlatContent uri_content = uri->GetFlatContent(); | 92 String::FlatContent uri_content = uri->GetFlatContent(); |
| 100 | 93 |
| 101 for (int k = 0; k < uri_length; k++) { | 94 for (int k = 0; k < uri_length; k++) { |
| 102 uc16 cc1 = uri_content.Get(k); | 95 uc16 cc1 = uri_content.Get(k); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 113 if (IsUnescapePredicateInUriComponent(cc1) || | 106 if (IsUnescapePredicateInUriComponent(cc1) || |
| 114 (is_uri && IsUriSeparator(cc1))) { | 107 (is_uri && IsUriSeparator(cc1))) { |
| 115 buffer.Add(cc1); | 108 buffer.Add(cc1); |
| 116 } else { | 109 } else { |
| 117 EncodeSingle(cc1, &buffer); | 110 EncodeSingle(cc1, &buffer); |
| 118 } | 111 } |
| 119 continue; | 112 continue; |
| 120 } | 113 } |
| 121 | 114 |
| 122 AllowHeapAllocation allocate_error_and_return; | 115 AllowHeapAllocation allocate_error_and_return; |
| 123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); | 116 THROW_NEW_ERROR(isolate, NewURIError(), Object); |
| 124 } | 117 } |
| 125 } | 118 } |
| 126 | 119 |
| 127 Handle<String> result; | 120 Handle<String> result; |
| 128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 121 ASSIGN_RETURN_ON_EXCEPTION( |
| 129 isolate, result, | 122 isolate, result, |
| 130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); | 123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object); |
| 131 return *result; | 124 return result; |
| 125 } | |
| 126 | |
| 127 namespace { // anonymous namespace for DecodeURI helper functions | |
| 128 | |
| 129 bool IsReservedPredicate(uc16 c) { | |
| 130 switch (c) { | |
| 131 case '#': | |
| 132 case '$': | |
| 133 case '&': | |
| 134 case '+': | |
| 135 case ',': | |
| 136 case '/': | |
| 137 case ':': | |
| 138 case ';': | |
| 139 case '=': | |
| 140 case '?': | |
| 141 case '@': | |
| 142 return true; | |
| 143 default: | |
| 144 return false; | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 bool IsReplacementCharacter(const uint8_t* octets, int length) { | |
| 149 // The replacement character is at codepoint U+FFFD in the Unicode Specials | |
| 150 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD. | |
| 151 if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf || | |
| 152 octets[2] != 0xbd) { | |
| 153 return false; | |
| 154 } | |
| 155 return true; | |
| 156 } | |
| 157 | |
| 158 bool DecodeOctets(const uint8_t* octets, int length, | |
| 159 List<uc16>* two_byte_buffer) { | |
| 160 size_t cursor = 0; | |
| 161 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor); | |
| 162 if (value == unibrow::Utf8::kBadChar && | |
| 163 !IsReplacementCharacter(octets, length)) { | |
| 164 return false; | |
| 165 } | |
| 166 | |
| 167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
| 168 two_byte_buffer->Add(value); | |
| 169 } else { | |
| 170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value)); | |
| 171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value)); | |
| 172 } | |
| 173 return true; | |
| 174 } | |
| 175 | |
| 176 bool TwoDigitHex(uc16* decoded, int index, String::FlatContent* uri_content) { | |
| 177 char high = HexValue(uri_content->Get(index + 1)); | |
| 178 char low = HexValue(uri_content->Get(index + 2)); | |
| 179 if (high < 0 || low < 0) { | |
| 180 return false; | |
| 181 } | |
| 182 *decoded = (high << 4) | low; | |
| 183 return true; | |
| 184 } | |
| 185 | |
| 186 template <typename T> | |
| 187 bool AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index, | |
| 188 bool is_uri, List<T>* buffer) { | |
| 189 if (is_uri && IsReservedPredicate(decoded)) { | |
| 190 buffer->Add('%'); | |
| 191 uc16 first = uri_content->Get(index + 1); | |
| 192 uc16 second = uri_content->Get(index + 2); | |
| 193 if (first > std::numeric_limits<T>::max() || | |
|
Yang
2016/05/23 11:24:59
Do you expect this to happen in some cases? Imo th
Franzi
2016/05/24 15:07:38
You're right. Can never happen. Done.
| |
| 194 second > std::numeric_limits<T>::max()) { | |
| 195 return false; | |
| 196 } | |
| 197 buffer->Add(first); | |
| 198 buffer->Add(second); | |
| 199 } else { | |
| 200 buffer->Add(decoded); | |
| 201 } | |
| 202 return true; | |
| 203 } | |
| 204 | |
| 205 bool IntoTwoByte(int index, bool is_uri, int uri_length, | |
| 206 String::FlatContent* uri_content, | |
| 207 List<uc16>* two_byte_buffer) { | |
| 208 for (int k = index; k < uri_length; k++) { | |
| 209 uc16 code = uri_content->Get(k); | |
| 210 if (code == '%') { | |
| 211 uc16 decoded; | |
| 212 if (k + 2 >= uri_length || !TwoDigitHex(&decoded, k, uri_content)) { | |
| 213 return false; | |
| 214 } | |
| 215 k += 2; | |
| 216 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
| 217 int n = 1; | |
| 218 while ((decoded << n) & 0x80) { | |
|
Yang
2016/05/23 11:24:59
Actually, you could merge this while loop with the
Franzi
2016/05/24 15:07:38
Merged. Also renamed n to number_of_continuation_b
| |
| 219 n++; | |
| 220 } | |
| 221 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) { | |
| 222 return false; | |
| 223 } | |
| 224 uint8_t octets[4]; | |
| 225 int octet_length = 0; | |
| 226 octets[octet_length++] = decoded; | |
| 227 | |
| 228 for (int i = 1; i < n; i++) { | |
| 229 uc16 decodedTrail; | |
| 230 | |
| 231 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length || | |
| 232 !TwoDigitHex(&decodedTrail, k, uri_content)) { | |
| 233 return false; | |
| 234 } | |
| 235 k += 2; | |
| 236 octets[octet_length++] = decodedTrail; | |
| 237 } | |
| 238 | |
| 239 if (!DecodeOctets(octets, octet_length, two_byte_buffer)) { | |
| 240 return false; | |
| 241 } | |
| 242 } else { | |
| 243 if (!AddToBuffer(decoded, uri_content, k - 2, is_uri, | |
| 244 two_byte_buffer)) { | |
| 245 return false; | |
| 246 } | |
| 247 } | |
| 248 } else { | |
| 249 two_byte_buffer->Add(code); | |
| 250 } | |
| 251 } | |
| 252 return true; | |
| 253 } | |
| 254 | |
| 255 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, | |
| 256 List<uint8_t>* one_byte_buffer, | |
| 257 List<uc16>* two_byte_buffer) { | |
| 258 DisallowHeapAllocation no_gc; | |
| 259 String::FlatContent uri_content = uri->GetFlatContent(); | |
| 260 | |
| 261 int uri_length = uri->length(); | |
| 262 for (int k = 0; k < uri_length; k++) { | |
| 263 uc16 code = uri_content.Get(k); | |
| 264 if (code == '%') { | |
| 265 uc16 decoded; | |
| 266 if (k + 2 >= uri_length || !TwoDigitHex(&decoded, k, &uri_content)) { | |
| 267 return false; | |
| 268 } | |
| 269 | |
| 270 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
| 271 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
| 272 two_byte_buffer); | |
| 273 } | |
| 274 | |
| 275 if (!AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer)) { | |
| 276 return false; | |
| 277 } | |
| 278 k += 2; | |
| 279 } else { | |
| 280 if (code > unibrow::Utf8::kMaxOneByteChar) { | |
| 281 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
| 282 two_byte_buffer); | |
| 283 } | |
| 284 one_byte_buffer->Add(code); | |
| 285 } | |
| 286 } | |
| 287 return true; | |
| 288 } | |
| 289 | |
| 290 } // anonymous namespace | |
| 291 | |
| 292 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri, | |
| 293 bool is_uri) { | |
| 294 uri = String::Flatten(uri); | |
| 295 List<uint8_t> one_byte_buffer; | |
| 296 List<uc16> two_byte_buffer; | |
| 297 | |
| 298 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { | |
| 299 THROW_NEW_ERROR(isolate, NewURIError(), Object); | |
| 300 } | |
| 301 | |
| 302 if (two_byte_buffer.is_empty()) { | |
| 303 Handle<SeqOneByteString> result; | |
| 304 | |
| 305 ASSIGN_RETURN_ON_EXCEPTION( | |
| 306 isolate, result, | |
| 307 isolate->factory()->NewRawOneByteString(one_byte_buffer.length()), | |
|
Yang
2016/05/23 11:24:59
You can use NewStringFromOneByte here.
Franzi
2016/05/24 15:07:38
Done.
| |
| 308 Object); | |
| 309 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(), | |
| 310 one_byte_buffer.length()); | |
| 311 return result; | |
| 312 } | |
| 313 | |
| 314 Handle<SeqTwoByteString> result; | |
| 315 ASSIGN_RETURN_ON_EXCEPTION( | |
| 316 isolate, result, isolate->factory()->NewRawTwoByteString( | |
| 317 one_byte_buffer.length() + two_byte_buffer.length()), | |
| 318 Object); | |
| 319 | |
| 320 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(), | |
| 321 one_byte_buffer.length()); | |
| 322 CopyChars(result->GetChars() + one_byte_buffer.length(), | |
| 323 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length()); | |
| 324 | |
| 325 return result; | |
| 132 } | 326 } |
| 133 | 327 |
| 134 } // namespace internal | 328 } // namespace internal |
| 135 } // namespace v8 | 329 } // namespace v8 |
| OLD | NEW |