OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/uri.h" | 5 #include "src/uri.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/handles.h" | 8 #include "src/handles.h" |
9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
10 #include "src/list.h" | 10 #include "src/list.h" |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 } | 53 } |
54 } | 54 } |
55 | 55 |
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { | 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
57 buffer->Add('%'); | 57 buffer->Add('%'); |
58 buffer->Add(HexCharOfValue(octet >> 4)); | 58 buffer->Add(HexCharOfValue(octet >> 4)); |
59 buffer->Add(HexCharOfValue(octet & 0x0F)); | 59 buffer->Add(HexCharOfValue(octet & 0x0F)); |
60 } | 60 } |
61 | 61 |
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { | 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
63 uint8_t x = (c >> 12) & 0xF; | 63 char s[4]; |
64 uint8_t y = (c >> 6) & 63; | 64 int number_of_bytes; |
65 uint8_t z = c & 63; | 65 number_of_bytes = |
66 if (c <= 0x007F) { | 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
67 AddHexEncodedToBuffer(c, buffer); | 67 for (int k = 0; k < number_of_bytes; k++) { |
68 } else if (c <= 0x07FF) { | 68 AddHexEncodedToBuffer(s[k], buffer); |
69 AddHexEncodedToBuffer(y + 192, buffer); | |
70 AddHexEncodedToBuffer(z + 128, buffer); | |
71 } else { | |
72 AddHexEncodedToBuffer(x + 224, buffer); | |
73 AddHexEncodedToBuffer(y + 128, buffer); | |
74 AddHexEncodedToBuffer(z + 128, buffer); | |
75 } | 69 } |
76 } | 70 } |
77 | 71 |
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { | 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; | 73 char s[4]; |
80 uint8_t w = (cc1 >> 2) & 0xF; | 74 int number_of_bytes = |
81 uint8_t x = cc1 & 3; | 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
82 uint8_t y = (cc2 >> 6) & 0xF; | 76 unibrow::Utf16::kNoPreviousCharacter, false); |
83 uint8_t z = cc2 & 63; | 77 for (int k = 0; k < number_of_bytes; k++) { |
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); | 78 AddHexEncodedToBuffer(s[k], buffer); |
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); | 79 } |
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); | |
87 AddHexEncodedToBuffer(z + 128, buffer); | |
88 } | 80 } |
89 | 81 |
90 } // anonymous namespace | 82 } // anonymous namespace |
91 | 83 |
92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { | 84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri, |
85 bool is_uri) { | |
93 uri = String::Flatten(uri); | 86 uri = String::Flatten(uri); |
94 int uri_length = uri->length(); | 87 int uri_length = uri->length(); |
95 List<uint8_t> buffer(uri_length); | 88 List<uint8_t> buffer(uri_length); |
96 | 89 |
97 { | 90 { |
98 DisallowHeapAllocation no_gc; | 91 DisallowHeapAllocation no_gc; |
99 String::FlatContent uri_content = uri->GetFlatContent(); | 92 String::FlatContent uri_content = uri->GetFlatContent(); |
100 | 93 |
101 for (int k = 0; k < uri_length; k++) { | 94 for (int k = 0; k < uri_length; k++) { |
102 uc16 cc1 = uri_content.Get(k); | 95 uc16 cc1 = uri_content.Get(k); |
(...skipping 10 matching lines...) Expand all Loading... | |
113 if (IsUnescapePredicateInUriComponent(cc1) || | 106 if (IsUnescapePredicateInUriComponent(cc1) || |
114 (is_uri && IsUriSeparator(cc1))) { | 107 (is_uri && IsUriSeparator(cc1))) { |
115 buffer.Add(cc1); | 108 buffer.Add(cc1); |
116 } else { | 109 } else { |
117 EncodeSingle(cc1, &buffer); | 110 EncodeSingle(cc1, &buffer); |
118 } | 111 } |
119 continue; | 112 continue; |
120 } | 113 } |
121 | 114 |
122 AllowHeapAllocation allocate_error_and_return; | 115 AllowHeapAllocation allocate_error_and_return; |
123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); | 116 THROW_NEW_ERROR(isolate, NewURIError(), Object); |
124 } | 117 } |
125 } | 118 } |
126 | 119 |
127 Handle<String> result; | 120 Handle<String> result; |
128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 121 ASSIGN_RETURN_ON_EXCEPTION( |
129 isolate, result, | 122 isolate, result, |
130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); | 123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object); |
131 return *result; | 124 return result; |
125 } | |
126 | |
127 namespace { // anonymous namespace for DecodeURI helper functions | |
128 | |
129 bool IsReservedPredicate(uc16 c) { | |
130 switch (c) { | |
131 case '#': | |
132 case '$': | |
133 case '&': | |
134 case '+': | |
135 case ',': | |
136 case '/': | |
137 case ':': | |
138 case ';': | |
139 case '=': | |
140 case '?': | |
141 case '@': | |
142 return true; | |
143 default: | |
144 return false; | |
145 } | |
146 } | |
147 | |
148 bool IsRepalcementCharacter(List<uint8_t>* octets) { | |
Yang
2016/05/23 06:44:32
typo.
Franzi
2016/05/23 08:55:57
Done.
| |
149 // 0xFFFD is %ef%bf%bd | |
Yang
2016/05/23 06:44:32
What does this comment mean?
Franzi
2016/05/23 08:55:57
Reworded the comment to clarify why we check for t
| |
150 if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf || | |
151 octets->at(2) != 0xbd) { | |
152 return false; | |
153 } | |
154 return true; | |
155 } | |
156 | |
157 bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) { | |
158 size_t cursor = 0; | |
159 uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(), | |
160 octets->length(), &cursor); | |
161 // kBadChar is the Replacement Character, which is the decoding of | |
162 // valid input %ef%bf%bd | |
163 if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) { | |
164 return false; | |
165 } | |
166 | |
167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
168 two_byte_buffer->Add(value); | |
169 } else { | |
170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value)); | |
171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value)); | |
172 } | |
173 return true; | |
174 } | |
175 | |
176 bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) { | |
Yang
2016/05/23 06:44:31
Can we use uc16* as argument type? That way it's e
Yang
2016/05/23 06:44:32
can we call the second argument "index" or somethi
Franzi
2016/05/23 08:55:57
Done.
Franzi
2016/05/23 08:55:57
Done.
| |
177 char high = HexValue(uri_content->Get(k + 1)); | |
Yang
2016/05/23 06:44:32
FlatContent::Get returns a uc16. Casting that to s
Franzi
2016/05/23 08:55:57
Not sure I understand the comment. HexValue takes
Yang
2016/05/23 11:24:59
Ah I see. I misunderstood. Nevermind this comment.
| |
178 char low = HexValue(uri_content->Get(k + 2)); | |
179 if (high < 0 || low < 0) { | |
180 return false; | |
181 } | |
182 decoded = (high << 4) | low; | |
183 return true; | |
184 } | |
185 | |
186 template <typename T> | |
187 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k, | |
Yang
2016/05/23 06:44:32
same here, "index" instead of "k".
Franzi
2016/05/23 08:55:57
Done.
| |
188 bool is_uri, List<T>* buffer) { | |
189 if (is_uri && IsReservedPredicate(decoded)) { | |
190 buffer->Add('%'); | |
191 buffer->Add(uri_content->Get(k + 1)); | |
192 buffer->Add(uri_content->Get(k + 2)); | |
Yang
2016/05/23 06:44:31
Can we have a safeguard here that we don't have im
Franzi
2016/05/23 08:55:57
Done. Throwing exception if uri_content->Get() is
| |
193 } else { | |
194 buffer->Add(decoded); | |
195 } | |
196 } | |
197 | |
198 bool IntoTwoByte(int index, bool is_uri, int uri_length, | |
199 String::FlatContent* uri_content, | |
200 List<uc16>* two_byte_buffer) { | |
201 for (int k = index; k < uri_length; k++) { | |
202 uc16 code = uri_content->Get(k); | |
203 if (code == '%') { | |
204 uc16 decoded; | |
205 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) { | |
206 return false; | |
207 } | |
208 k += 2; | |
209 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
210 int n = 0; | |
211 while (((decoded << ++n) & 0x80) != 0) { | |
Yang
2016/05/23 06:44:31
Can we have this as
do {
n++;
} while ((decode
Franzi
2016/05/23 08:55:57
Changed it to a simple while loop:
int n = 1;
| |
212 } | |
213 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) { | |
214 return false; | |
215 } | |
216 List<uint8_t> octets; | |
Yang
2016/05/23 06:44:31
octets will at most have the length 4, right? Can
Franzi
2016/05/23 08:55:57
Done.
| |
217 octets.Add(decoded); | |
218 | |
219 for (int i = 1; i < n; i++) { | |
220 uc16 decodedTrail; | |
221 | |
222 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length || | |
223 !TwoDigitHex(decodedTrail, k, uri_content)) { | |
224 return false; | |
225 } | |
226 k += 2; | |
227 octets.Add(decodedTrail); | |
228 } | |
229 | |
230 if (!DecodeOctets(&octets, two_byte_buffer)) { | |
231 return false; | |
232 } | |
233 } else { | |
234 AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer); | |
235 } | |
236 } else { | |
237 two_byte_buffer->Add(code); | |
238 } | |
239 } | |
240 return true; | |
241 } | |
242 | |
243 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, | |
244 List<uint8_t>* one_byte_buffer, | |
245 List<uc16>* two_byte_buffer) { | |
246 DisallowHeapAllocation no_gc; | |
247 String::FlatContent uri_content = uri->GetFlatContent(); | |
248 | |
249 int uri_length = uri->length(); | |
250 for (int k = 0; k < uri_length; k++) { | |
251 uc16 code = uri_content.Get(k); | |
252 if (code == '%') { | |
253 uc16 decoded; | |
254 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) { | |
255 return false; | |
256 } | |
257 | |
258 if (decoded > unibrow::Utf8::kMaxOneByteChar) { | |
259 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
260 two_byte_buffer); | |
261 } | |
262 | |
263 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); | |
264 k += 2; | |
265 } else { | |
266 if (code > unibrow::Utf8::kMaxOneByteChar) { | |
267 return IntoTwoByte(k, is_uri, uri_length, &uri_content, | |
268 two_byte_buffer); | |
269 } | |
270 one_byte_buffer->Add(code); | |
271 } | |
272 } | |
273 return true; | |
274 } | |
275 | |
276 } // anonymous namespace | |
277 | |
278 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri, | |
279 bool is_uri) { | |
280 uri = String::Flatten(uri); | |
281 List<uint8_t> one_byte_buffer; | |
282 List<uc16> two_byte_buffer; | |
283 | |
284 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { | |
285 THROW_NEW_ERROR(isolate, NewURIError(), Object); | |
286 } | |
287 | |
288 Handle<String> left = isolate->factory()->InternalizeOneByteString( | |
289 one_byte_buffer.ToConstVector()); | |
290 | |
291 Handle<String> right = isolate->factory()->InternalizeTwoByteString( | |
292 two_byte_buffer.ToConstVector()); | |
293 | |
294 Handle<String> result; | |
295 ASSIGN_RETURN_ON_EXCEPTION( | |
296 isolate, result, isolate->factory()->NewConsString(left, right), Object); | |
Yang
2016/05/23 06:44:32
Since we are going to copy from list into the heap
Franzi
2016/05/23 08:55:57
Returning sequential one- or two-byte string.
| |
297 | |
298 return result; | |
132 } | 299 } |
133 | 300 |
134 } // namespace internal | 301 } // namespace internal |
135 } // namespace v8 | 302 } // namespace v8 |
OLD | NEW |