Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/uri.h" | 5 #include "src/uri.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/handles.h" | 8 #include "src/handles.h" |
| 9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
| 10 #include "src/list.h" | 10 #include "src/list.h" |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 53 } | 53 } |
| 54 } | 54 } |
| 55 | 55 |
| 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { | 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
| 57 buffer->Add('%'); | 57 buffer->Add('%'); |
| 58 buffer->Add(HexCharOfValue(octet >> 4)); | 58 buffer->Add(HexCharOfValue(octet >> 4)); |
| 59 buffer->Add(HexCharOfValue(octet & 0x0F)); | 59 buffer->Add(HexCharOfValue(octet & 0x0F)); |
| 60 } | 60 } |
| 61 | 61 |
| 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { | 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
| 63 uint8_t x = (c >> 12) & 0xF; | 63 char s[4]; |
| 64 uint8_t y = (c >> 6) & 63; | 64 int number_of_bytes; |
| 65 uint8_t z = c & 63; | 65 number_of_bytes = |
| 66 if (c <= 0x007F) { | 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
| 67 AddHexEncodedToBuffer(c, buffer); | 67 for (int k = 0; k < number_of_bytes; k++) { |
| 68 } else if (c <= 0x07FF) { | 68 AddHexEncodedToBuffer(s[k], buffer); |
| 69 AddHexEncodedToBuffer(y + 192, buffer); | |
| 70 AddHexEncodedToBuffer(z + 128, buffer); | |
| 71 } else { | |
| 72 AddHexEncodedToBuffer(x + 224, buffer); | |
| 73 AddHexEncodedToBuffer(y + 128, buffer); | |
| 74 AddHexEncodedToBuffer(z + 128, buffer); | |
| 75 } | 69 } |
| 76 } | 70 } |
| 77 | 71 |
| 78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { | 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
| 79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; | 73 char s[4]; |
| 80 uint8_t w = (cc1 >> 2) & 0xF; | 74 int number_of_bytes = |
| 81 uint8_t x = cc1 & 3; | 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
| 82 uint8_t y = (cc2 >> 6) & 0xF; | 76 unibrow::Utf16::kNoPreviousCharacter, false); |
| 83 uint8_t z = cc2 & 63; | 77 for (int k = 0; k < number_of_bytes; k++) { |
| 84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); | 78 AddHexEncodedToBuffer(s[k], buffer); |
| 85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); | 79 } |
| 86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); | |
| 87 AddHexEncodedToBuffer(z + 128, buffer); | |
| 88 } | 80 } |
| 89 | 81 |
| 90 } // anonymous namespace | 82 } // anonymous namespace |
| 91 | 83 |
| 92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { | 84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri, |
| 85 bool is_uri) { | |
| 93 uri = String::Flatten(uri); | 86 uri = String::Flatten(uri); |
| 94 int uri_length = uri->length(); | 87 int uri_length = uri->length(); |
| 95 List<uint8_t> buffer(uri_length); | 88 List<uint8_t> buffer(uri_length); |
| 96 | 89 |
| 97 { | 90 { |
| 98 DisallowHeapAllocation no_gc; | 91 DisallowHeapAllocation no_gc; |
| 99 String::FlatContent uri_content = uri->GetFlatContent(); | 92 String::FlatContent uri_content = uri->GetFlatContent(); |
| 100 | 93 |
| 101 for (int k = 0; k < uri_length; k++) { | 94 for (int k = 0; k < uri_length; k++) { |
| 102 uc16 cc1 = uri_content.Get(k); | 95 uc16 cc1 = uri_content.Get(k); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 113 if (IsUnescapePredicateInUriComponent(cc1) || | 106 if (IsUnescapePredicateInUriComponent(cc1) || |
| 114 (is_uri && IsUriSeparator(cc1))) { | 107 (is_uri && IsUriSeparator(cc1))) { |
| 115 buffer.Add(cc1); | 108 buffer.Add(cc1); |
| 116 } else { | 109 } else { |
| 117 EncodeSingle(cc1, &buffer); | 110 EncodeSingle(cc1, &buffer); |
| 118 } | 111 } |
| 119 continue; | 112 continue; |
| 120 } | 113 } |
| 121 | 114 |
| 122 AllowHeapAllocation allocate_error_and_return; | 115 AllowHeapAllocation allocate_error_and_return; |
| 123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); | 116 THROW_NEW_ERROR(isolate, NewURIError(), Object); |
| 124 } | 117 } |
| 125 } | 118 } |
| 126 | 119 |
| 127 Handle<String> result; | 120 Handle<String> result; |
| 128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 121 ASSIGN_RETURN_ON_EXCEPTION( |
| 129 isolate, result, | 122 isolate, result, |
| 130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); | 123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object); |
| 131 return *result; | 124 return result; |
| 125 } | |
| 126 | |
| 127 namespace { // anonymous namespace for DecodeURI helper functions | |
| 128 | |
| 129 bool IsReservedPredicate(uc16 c) { | |
| 130 switch (c) { | |
| 131 case '#': | |
| 132 case '$': | |
| 133 case '&': | |
| 134 case '+': | |
| 135 case ',': | |
| 136 case '/': | |
| 137 case ':': | |
| 138 case ';': | |
| 139 case '=': | |
| 140 case '?': | |
| 141 case '@': | |
| 142 return true; | |
| 143 default: | |
| 144 return false; | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 bool IsRepalcementCharacter(List<uint8_t>* octets) { | |
|
Yang
2016/05/23 06:44:32
typo.
Franzi
2016/05/23 08:55:57
Done.
| |
| 149 // 0xFFFD is %ef%bf%bd | |
|
Yang
2016/05/23 06:44:32
What does this comment mean?
Franzi
2016/05/23 08:55:57
Reworded the comment to clarify why we check for t
| |
| 150 if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf || | |
| 151 octets->at(2) != 0xbd) { | |
| 152 return false; | |
| 153 } | |
| 154 return true; | |
| 155 } | |
| 156 | |
| 157 bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) { | |
| 158 size_t cursor = 0; | |
| 159 uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(), | |
| 160 octets->length(), &cursor); | |
| 161 // kBadChar is the Replacement Character, which is the decoding of | |
| 162 // valid input %ef%bf%bd | |
| 163 if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) { | |
| 164 return false; | |
| 165 } | |
| 166 | |
| 167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
| 168 two_byte_buffer->Add(value); | |
| 169 } else { | |
| 170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value)); | |
| 171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value)); | |
| 172 } | |
| 173 return true; | |
| 174 } | |
| 175 | |
| 176 bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) { | |
|
Yang
2016/05/23 06:44:31
Can we use uc16* as argument type? That way it's e
Yang
2016/05/23 06:44:32
can we call the second argument "index" or somethi
Franzi
2016/05/23 08:55:57
Done.
Franzi
2016/05/23 08:55:57
Done.
| |
| 177 char high = HexValue(uri_content->Get(k + 1)); | |
|
Yang
2016/05/23 06:44:32
FlatContent::Get returns a uc16. Casting that to s
Franzi
2016/05/23 08:55:57
Not sure I understand the comment. HexValue takes
Yang
2016/05/23 11:24:59
Ah I see. I misunderstood. Nevermind this comment.
| |
| 178 char low = HexValue(uri_content->Get(k + 2)); | |
| 179 if (high < 0 || low < 0) { | |
| 180 return false; | |
| 181 } | |
| 182 decoded = (high << 4) | low; | |
| 183 return true; | |
| 184 } | |
| 185 | |
| 186 template <typename T> | |
| 187 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k, | |
|
Yang
2016/05/23 06:44:32
same here, "index" instead of "k".
Franzi
2016/05/23 08:55:57
Done.
| |
| 188 bool is_uri, List<T>* buffer) { | |
| 189 if (is_uri && IsReservedPredicate(decoded)) { | |
| 190 buffer->Add('%'); | |
| 191 buffer->Add(uri_content->Get(k + 1)); | |
| 192 buffer->Add(uri_content->Get(k + 2)); | |
|
Yang
2016/05/23 06:44:31
Can we have a safeguard here that we don't have im
Franzi
2016/05/23 08:55:57
Done. Throwing exception if uri_content->Get() is
| |
| 193 } else { | |
| 194 buffer->Add(decoded); | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 bool IntoTwoByte(int index, bool is_uri, int uri_length, | |
| 199 String::FlatContent* uri_content, | |
| 200 List<uc16>* two_byte_buffer) { | |
| 201 for (int k = index; k < uri_length; k++) { | |
| 202 uc16 code = uri_content->Get(k); | |
| 203 if (code == '%') { | |
| 204 uc16 decoded; | |
| 205 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) { | |
| 206 return false; | |
| 207 } | |
| 208 k += 2; | |
| 209 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
| 210 int n = 0; | |
| 211 while (((decoded << ++n) & 0x80) != 0) { | |
|
Yang
2016/05/23 06:44:31
Can we have this as
do {
n++;
} while ((decode
Franzi
2016/05/23 08:55:57
Changed it to a simple while loop:
int n = 1;
| |
| 212 } | |
| 213 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) { | |
| 214 return false; | |
| 215 } | |
| 216 List<uint8_t> octets; | |
|
Yang
2016/05/23 06:44:31
octets will at most have the length 4, right? Can
Franzi
2016/05/23 08:55:57
Done.
| |
| 217 octets.Add(decoded); | |
| 218 | |
| 219 for (int i = 1; i < n; i++) { | |
| 220 uc16 decodedTrail; | |
| 221 | |
| 222 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length || | |
| 223 !TwoDigitHex(decodedTrail, k, uri_content)) { | |
| 224 return false; | |
| 225 } | |
| 226 k += 2; | |
| 227 octets.Add(decodedTrail); | |
| 228 } | |
| 229 | |
| 230 if (!DecodeOctets(&octets, two_byte_buffer)) { | |
| 231 return false; | |
| 232 } | |
| 233 } else { | |
| 234 AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer); | |
| 235 } | |
| 236 } else { | |
| 237 two_byte_buffer->Add(code); | |
| 238 } | |
| 239 } | |
| 240 return true; | |
| 241 } | |
| 242 | |
| 243 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, | |
| 244 List<uint8_t>* one_byte_buffer, | |
| 245 List<uc16>* two_byte_buffer) { | |
| 246 DisallowHeapAllocation no_gc; | |
| 247 String::FlatContent uri_content = uri->GetFlatContent(); | |
| 248 | |
| 249 int uri_length = uri->length(); | |
| 250 for (int k = 0; k < uri_length; k++) { | |
| 251 uc16 code = uri_content.Get(k); | |
| 252 if (code == '%') { | |
| 253 uc16 decoded; | |
| 254 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) { | |
| 255 return false; | |
| 256 } | |
| 257 | |
| 258 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
| 259 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
| 260 two_byte_buffer); | |
| 261 } | |
| 262 | |
| 263 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); | |
| 264 k += 2; | |
| 265 } else { | |
| 266 if (code > unibrow::Utf8::kMaxOneByteChar) { | |
| 267 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
| 268 two_byte_buffer); | |
| 269 } | |
| 270 one_byte_buffer->Add(code); | |
| 271 } | |
| 272 } | |
| 273 return true; | |
| 274 } | |
| 275 | |
| 276 } // anonymous namespace | |
| 277 | |
| 278 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri, | |
| 279 bool is_uri) { | |
| 280 uri = String::Flatten(uri); | |
| 281 List<uint8_t> one_byte_buffer; | |
| 282 List<uc16> two_byte_buffer; | |
| 283 | |
| 284 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { | |
| 285 THROW_NEW_ERROR(isolate, NewURIError(), Object); | |
| 286 } | |
| 287 | |
| 288 Handle<String> left = isolate->factory()->InternalizeOneByteString( | |
| 289 one_byte_buffer.ToConstVector()); | |
| 290 | |
| 291 Handle<String> right = isolate->factory()->InternalizeTwoByteString( | |
| 292 two_byte_buffer.ToConstVector()); | |
| 293 | |
| 294 Handle<String> result; | |
| 295 ASSIGN_RETURN_ON_EXCEPTION( | |
| 296 isolate, result, isolate->factory()->NewConsString(left, right), Object); | |
|
Yang
2016/05/23 06:44:32
Since we are going to copy from list into the heap
Franzi
2016/05/23 08:55:57
Returning sequential one- or two-byte string.
| |
| 297 | |
| 298 return result; | |
| 132 } | 299 } |
| 133 | 300 |
| 134 } // namespace internal | 301 } // namespace internal |
| 135 } // namespace v8 | 302 } // namespace v8 |
| OLD | NEW |