OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/uri.h" | 5 #include "src/uri.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/handles.h" | 8 #include "src/handles.h" |
9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
10 #include "src/list.h" | 10 #include "src/list.h" |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 } | 53 } |
54 } | 54 } |
55 | 55 |
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { | 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
57 buffer->Add('%'); | 57 buffer->Add('%'); |
58 buffer->Add(HexCharOfValue(octet >> 4)); | 58 buffer->Add(HexCharOfValue(octet >> 4)); |
59 buffer->Add(HexCharOfValue(octet & 0x0F)); | 59 buffer->Add(HexCharOfValue(octet & 0x0F)); |
60 } | 60 } |
61 | 61 |
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { | 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
63 uint8_t x = (c >> 12) & 0xF; | 63 char s[4]; |
64 uint8_t y = (c >> 6) & 63; | 64 int number_of_bytes; |
65 uint8_t z = c & 63; | 65 number_of_bytes = |
66 if (c <= 0x007F) { | 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
67 AddHexEncodedToBuffer(c, buffer); | 67 for (int k = 0; k < number_of_bytes; k++) { |
68 } else if (c <= 0x07FF) { | 68 AddHexEncodedToBuffer(s[k], buffer); |
69 AddHexEncodedToBuffer(y + 192, buffer); | |
70 AddHexEncodedToBuffer(z + 128, buffer); | |
71 } else { | |
72 AddHexEncodedToBuffer(x + 224, buffer); | |
73 AddHexEncodedToBuffer(y + 128, buffer); | |
74 AddHexEncodedToBuffer(z + 128, buffer); | |
75 } | 69 } |
76 } | 70 } |
77 | 71 |
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { | 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; | 73 char s[4]; |
80 uint8_t w = (cc1 >> 2) & 0xF; | 74 int number_of_bytes = |
81 uint8_t x = cc1 & 3; | 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
82 uint8_t y = (cc2 >> 6) & 0xF; | 76 unibrow::Utf16::kNoPreviousCharacter, false); |
83 uint8_t z = cc2 & 63; | 77 for (int k = 0; k < number_of_bytes; k++) { |
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); | 78 AddHexEncodedToBuffer(s[k], buffer); |
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); | 79 } |
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); | |
87 AddHexEncodedToBuffer(z + 128, buffer); | |
88 } | 80 } |
89 | 81 |
90 } // anonymous namespace | 82 } // anonymous namespace |
91 | 83 |
92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { | 84 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { |
93 uri = String::Flatten(uri); | 85 uri = String::Flatten(uri); |
94 int uri_length = uri->length(); | 86 int uri_length = uri->length(); |
95 List<uint8_t> buffer(uri_length); | 87 List<uint8_t> buffer(uri_length); |
96 | 88 |
97 { | 89 { |
(...skipping 26 matching lines...) Expand all Loading... |
124 } | 116 } |
125 } | 117 } |
126 | 118 |
127 Handle<String> result; | 119 Handle<String> result; |
128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 120 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
129 isolate, result, | 121 isolate, result, |
130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); | 122 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); |
131 return *result; | 123 return *result; |
132 } | 124 } |
133 | 125 |
| 126 namespace { // anonymous namespace for DecodeURI helper functions |
| 127 |
| 128 bool IsReservedPredicate(uc16 c) { |
| 129 switch (c) { |
| 130 case '#': |
| 131 case '$': |
| 132 case '&': |
| 133 case '+': |
| 134 case ',': |
| 135 case '/': |
| 136 case ':': |
| 137 case ';': |
| 138 case '=': |
| 139 case '?': |
| 140 case '@': |
| 141 return true; |
| 142 default: |
| 143 return false; |
| 144 } |
| 145 } |
| 146 |
| 147 bool IsRepalcementCharacter(List<uint8_t>* octets) { |
| 148 // 0xFFFD is %ef%bf%bd |
| 149 if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf || |
| 150 octets->at(2) != 0xbd) { |
| 151 return false; |
| 152 } |
| 153 return true; |
| 154 } |
| 155 |
| 156 bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) { |
| 157 size_t cursor = 0; |
| 158 uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(), |
| 159 octets->length(), &cursor); |
| 160 // kBadChar is the Replacement Character, which is the decoding of |
| 161 // valid input %ef%bf%bd |
| 162 if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) { |
| 163 return false; |
| 164 } |
| 165 |
| 166 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 167 two_byte_buffer->Add(value); |
| 168 } else { |
| 169 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value)); |
| 170 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value)); |
| 171 } |
| 172 return true; |
| 173 } |
| 174 |
| 175 bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) { |
| 176 char high = HexValue(uri_content->Get(k + 1)); |
| 177 char low = HexValue(uri_content->Get(k + 2)); |
| 178 if (high < 0 || low < 0) { |
| 179 return false; |
| 180 } |
| 181 decoded = (high << 4) | low; |
| 182 return true; |
| 183 } |
| 184 |
| 185 template <typename T> |
| 186 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k, |
| 187 bool is_uri, List<T>* buffer) { |
| 188 if (is_uri && IsReservedPredicate(decoded)) { |
| 189 buffer->Add('%'); |
| 190 buffer->Add(uri_content->Get(k + 1)); |
| 191 buffer->Add(uri_content->Get(k + 2)); |
| 192 } else { |
| 193 buffer->Add(decoded); |
| 194 } |
| 195 } |
| 196 |
| 197 bool IntoTwoByte(int index, bool is_uri, int uri_length, |
| 198 String::FlatContent* uri_content, |
| 199 List<uc16>* two_byte_buffer) { |
| 200 for (int k = index; k < uri_length; k++) { |
| 201 uc16 code = uri_content->Get(k); |
| 202 if (code == '%') { |
| 203 uc16 decoded; |
| 204 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) { |
| 205 return false; |
| 206 } |
| 207 k += 2; |
| 208 if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| 209 int n = 0; |
| 210 while (((decoded << ++n) & 0x80) != 0) { |
| 211 } |
| 212 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) { |
| 213 return false; |
| 214 } |
| 215 List<uint8_t> octets; |
| 216 octets.Add(decoded); |
| 217 |
| 218 for (int i = 1; i < n; i++) { |
| 219 uc16 decodedTrail; |
| 220 |
| 221 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length || |
| 222 !TwoDigitHex(decodedTrail, k, uri_content)) { |
| 223 return false; |
| 224 } |
| 225 k += 2; |
| 226 octets.Add(decodedTrail); |
| 227 } |
| 228 |
| 229 if (!DecodeOctets(&octets, two_byte_buffer)) { |
| 230 return false; |
| 231 } |
| 232 } else { |
| 233 AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer); |
| 234 } |
| 235 } else { |
| 236 two_byte_buffer->Add(code); |
| 237 } |
| 238 } |
| 239 return true; |
| 240 } |
| 241 |
| 242 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, |
| 243 List<uint8_t>* one_byte_buffer, |
| 244 List<uc16>* two_byte_buffer) { |
| 245 DisallowHeapAllocation no_gc; |
| 246 String::FlatContent uri_content = uri->GetFlatContent(); |
| 247 |
| 248 int uri_length = uri->length(); |
| 249 for (int k = 0; k < uri_length; k++) { |
| 250 uc16 code = uri_content.Get(k); |
| 251 if (code == '%') { |
| 252 uc16 decoded; |
| 253 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) { |
| 254 return false; |
| 255 } |
| 256 |
| 257 if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| 258 return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| 259 two_byte_buffer); |
| 260 } |
| 261 |
| 262 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); |
| 263 k += 2; |
| 264 } else { |
| 265 if (code > unibrow::Utf8::kMaxOneByteChar) { |
| 266 return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| 267 two_byte_buffer); |
| 268 } |
| 269 one_byte_buffer->Add(code); |
| 270 } |
| 271 } |
| 272 return true; |
| 273 } |
| 274 |
| 275 } // anonymous namespace |
| 276 |
| 277 Object* Uri::Decode(Isolate* isolate, Handle<String> uri, bool is_uri) { |
| 278 uri = String::Flatten(uri); |
| 279 List<uint8_t> one_byte_buffer; |
| 280 List<uc16> two_byte_buffer; |
| 281 |
| 282 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { |
| 283 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); |
| 284 } |
| 285 |
| 286 Handle<String> left = isolate->factory()->InternalizeOneByteString( |
| 287 one_byte_buffer.ToConstVector()); |
| 288 |
| 289 Handle<String> right = isolate->factory()->InternalizeTwoByteString( |
| 290 two_byte_buffer.ToConstVector()); |
| 291 |
| 292 Handle<String> result; |
| 293 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 294 isolate, result, isolate->factory()->NewConsString(left, right)); |
| 295 |
| 296 return *result; |
| 297 } |
| 298 |
134 } // namespace internal | 299 } // namespace internal |
135 } // namespace v8 | 300 } // namespace v8 |
OLD | NEW |