OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/uri.h" | 5 #include "src/uri.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/handles.h" | 8 #include "src/handles.h" |
9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
10 #include "src/list.h" | 10 #include "src/list.h" |
11 | 11 |
12 namespace v8 { | 12 namespace v8 { |
13 namespace internal { | 13 namespace internal { |
14 | 14 |
| 15 namespace { // anonymous namespace for DecodeURI helper functions |
| 16 bool IsReservedPredicate(uc16 c) { |
| 17 switch (c) { |
| 18 case '#': |
| 19 case '$': |
| 20 case '&': |
| 21 case '+': |
| 22 case ',': |
| 23 case '/': |
| 24 case ':': |
| 25 case ';': |
| 26 case '=': |
| 27 case '?': |
| 28 case '@': |
| 29 return true; |
| 30 default: |
| 31 return false; |
| 32 } |
| 33 } |
| 34 |
| 35 bool IsReplacementCharacter(const uint8_t* octets, int length) { |
| 36 // The replacement character is at codepoint U+FFFD in the Unicode Specials |
| 37 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD. |
| 38 if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf || |
| 39 octets[2] != 0xbd) { |
| 40 return false; |
| 41 } |
| 42 return true; |
| 43 } |
| 44 |
| 45 bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) { |
| 46 size_t cursor = 0; |
| 47 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor); |
| 48 if (value == unibrow::Utf8::kBadChar && |
| 49 !IsReplacementCharacter(octets, length)) { |
| 50 return false; |
| 51 } |
| 52 |
| 53 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 54 buffer->Add(value); |
| 55 } else { |
| 56 buffer->Add(unibrow::Utf16::LeadSurrogate(value)); |
| 57 buffer->Add(unibrow::Utf16::TrailSurrogate(value)); |
| 58 } |
| 59 return true; |
| 60 } |
| 61 |
| 62 bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) { |
| 63 char high = HexValue(uri_content->Get(index + 1)); |
| 64 char low = HexValue(uri_content->Get(index + 2)); |
| 65 if (high < 0 || low < 0) { |
| 66 return false; |
| 67 } |
| 68 *decoded = (high << 4) | low; |
| 69 return true; |
| 70 } |
| 71 |
| 72 template <typename T> |
| 73 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index, |
| 74 bool is_uri, List<T>* buffer) { |
| 75 if (is_uri && IsReservedPredicate(decoded)) { |
| 76 buffer->Add('%'); |
| 77 uc16 first = uri_content->Get(index + 1); |
| 78 uc16 second = uri_content->Get(index + 2); |
| 79 DCHECK_GT(std::numeric_limits<T>::max(), first); |
| 80 DCHECK_GT(std::numeric_limits<T>::max(), second); |
| 81 |
| 82 buffer->Add(first); |
| 83 buffer->Add(second); |
| 84 } else { |
| 85 buffer->Add(decoded); |
| 86 } |
| 87 } |
| 88 |
| 89 bool IntoTwoByte(int index, bool is_uri, int uri_length, |
| 90 String::FlatContent* uri_content, List<uc16>* buffer) { |
| 91 for (int k = index; k < uri_length; k++) { |
| 92 uc16 code = uri_content->Get(k); |
| 93 if (code == '%') { |
| 94 uc16 decoded; |
| 95 if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) { |
| 96 return false; |
| 97 } |
| 98 k += 2; |
| 99 if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| 100 uint8_t octets[unibrow::Utf8::kMaxEncodedSize]; |
| 101 octets[0] = decoded; |
| 102 |
| 103 int number_of_continuation_bytes = 0; |
| 104 while ((decoded << ++number_of_continuation_bytes) & 0x80) { |
| 105 if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) { |
| 106 return false; |
| 107 } |
| 108 |
| 109 uc16 continuation_byte; |
| 110 |
| 111 if (uri_content->Get(++k) != '%' || |
| 112 !TwoDigitHex(k, uri_content, &continuation_byte)) { |
| 113 return false; |
| 114 } |
| 115 k += 2; |
| 116 octets[number_of_continuation_bytes] = continuation_byte; |
| 117 } |
| 118 |
| 119 if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) { |
| 120 return false; |
| 121 } |
| 122 } else { |
| 123 AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer); |
| 124 } |
| 125 } else { |
| 126 buffer->Add(code); |
| 127 } |
| 128 } |
| 129 return true; |
| 130 } |
| 131 |
| 132 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, |
| 133 List<uint8_t>* one_byte_buffer, |
| 134 List<uc16>* two_byte_buffer) { |
| 135 DisallowHeapAllocation no_gc; |
| 136 String::FlatContent uri_content = uri->GetFlatContent(); |
| 137 |
| 138 int uri_length = uri->length(); |
| 139 for (int k = 0; k < uri_length; k++) { |
| 140 uc16 code = uri_content.Get(k); |
| 141 if (code == '%') { |
| 142 uc16 decoded; |
| 143 if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) { |
| 144 return false; |
| 145 } |
| 146 |
| 147 if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| 148 return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| 149 two_byte_buffer); |
| 150 } |
| 151 |
| 152 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); |
| 153 k += 2; |
| 154 } else { |
| 155 if (code > unibrow::Utf8::kMaxOneByteChar) { |
| 156 return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| 157 two_byte_buffer); |
| 158 } |
| 159 one_byte_buffer->Add(code); |
| 160 } |
| 161 } |
| 162 return true; |
| 163 } |
| 164 |
| 165 } // anonymous namespace |
| 166 |
| 167 MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri, |
| 168 bool is_uri) { |
| 169 uri = String::Flatten(uri); |
| 170 List<uint8_t> one_byte_buffer; |
| 171 List<uc16> two_byte_buffer; |
| 172 |
| 173 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { |
| 174 THROW_NEW_ERROR(isolate, NewURIError(), String); |
| 175 } |
| 176 |
| 177 if (two_byte_buffer.is_empty()) { |
| 178 return isolate->factory()->NewStringFromOneByte( |
| 179 one_byte_buffer.ToConstVector()); |
| 180 } |
| 181 |
| 182 Handle<SeqTwoByteString> result; |
| 183 ASSIGN_RETURN_ON_EXCEPTION( |
| 184 isolate, result, isolate->factory()->NewRawTwoByteString( |
| 185 one_byte_buffer.length() + two_byte_buffer.length()), |
| 186 String); |
| 187 |
| 188 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(), |
| 189 one_byte_buffer.length()); |
| 190 CopyChars(result->GetChars() + one_byte_buffer.length(), |
| 191 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length()); |
| 192 |
| 193 return result; |
| 194 } |
| 195 |
15 namespace { // anonymous namespace for EncodeURI helper functions | 196 namespace { // anonymous namespace for EncodeURI helper functions |
16 bool IsUnescapePredicateInUriComponent(uc16 c) { | 197 bool IsUnescapePredicateInUriComponent(uc16 c) { |
17 if (IsAlphaNumeric(c)) { | 198 if (IsAlphaNumeric(c)) { |
18 return true; | 199 return true; |
19 } | 200 } |
20 | 201 |
21 switch (c) { | 202 switch (c) { |
22 case '!': | 203 case '!': |
23 case '\'': | 204 case '\'': |
24 case '(': | 205 case '(': |
(...skipping 28 matching lines...) Expand all Loading... |
53 } | 234 } |
54 } | 235 } |
55 | 236 |
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { | 237 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
57 buffer->Add('%'); | 238 buffer->Add('%'); |
58 buffer->Add(HexCharOfValue(octet >> 4)); | 239 buffer->Add(HexCharOfValue(octet >> 4)); |
59 buffer->Add(HexCharOfValue(octet & 0x0F)); | 240 buffer->Add(HexCharOfValue(octet & 0x0F)); |
60 } | 241 } |
61 | 242 |
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { | 243 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
63 uint8_t x = (c >> 12) & 0xF; | 244 char s[4]; |
64 uint8_t y = (c >> 6) & 63; | 245 int number_of_bytes; |
65 uint8_t z = c & 63; | 246 number_of_bytes = |
66 if (c <= 0x007F) { | 247 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
67 AddHexEncodedToBuffer(c, buffer); | 248 for (int k = 0; k < number_of_bytes; k++) { |
68 } else if (c <= 0x07FF) { | 249 AddHexEncodedToBuffer(s[k], buffer); |
69 AddHexEncodedToBuffer(y + 192, buffer); | |
70 AddHexEncodedToBuffer(z + 128, buffer); | |
71 } else { | |
72 AddHexEncodedToBuffer(x + 224, buffer); | |
73 AddHexEncodedToBuffer(y + 128, buffer); | |
74 AddHexEncodedToBuffer(z + 128, buffer); | |
75 } | 250 } |
76 } | 251 } |
77 | 252 |
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { | 253 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; | 254 char s[4]; |
80 uint8_t w = (cc1 >> 2) & 0xF; | 255 int number_of_bytes = |
81 uint8_t x = cc1 & 3; | 256 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
82 uint8_t y = (cc2 >> 6) & 0xF; | 257 unibrow::Utf16::kNoPreviousCharacter, false); |
83 uint8_t z = cc2 & 63; | 258 for (int k = 0; k < number_of_bytes; k++) { |
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); | 259 AddHexEncodedToBuffer(s[k], buffer); |
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); | 260 } |
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); | |
87 AddHexEncodedToBuffer(z + 128, buffer); | |
88 } | 261 } |
89 | 262 |
90 } // anonymous namespace | 263 } // anonymous namespace |
91 | 264 |
92 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri, | 265 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri, |
93 bool is_uri) { | 266 bool is_uri) { |
94 uri = String::Flatten(uri); | 267 uri = String::Flatten(uri); |
95 int uri_length = uri->length(); | 268 int uri_length = uri->length(); |
96 List<uint8_t> buffer(uri_length); | 269 List<uint8_t> buffer(uri_length); |
97 | 270 |
(...skipping 25 matching lines...) Expand all Loading... |
123 AllowHeapAllocation allocate_error_and_return; | 296 AllowHeapAllocation allocate_error_and_return; |
124 THROW_NEW_ERROR(isolate, NewURIError(), String); | 297 THROW_NEW_ERROR(isolate, NewURIError(), String); |
125 } | 298 } |
126 } | 299 } |
127 | 300 |
128 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()); | 301 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()); |
129 } | 302 } |
130 | 303 |
131 } // namespace internal | 304 } // namespace internal |
132 } // namespace v8 | 305 } // namespace v8 |
OLD | NEW |