Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Move new test 612109.js to regress/ Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/uri.h" 5 #include "src/uri.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/handles.h" 8 #include "src/handles.h"
9 #include "src/isolate-inl.h" 9 #include "src/isolate-inl.h"
10 #include "src/list.h" 10 #include "src/list.h"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 } 53 }
54 } 54 }
55 55
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
57 buffer->Add('%'); 57 buffer->Add('%');
58 buffer->Add(HexCharOfValue(octet >> 4)); 58 buffer->Add(HexCharOfValue(octet >> 4));
59 buffer->Add(HexCharOfValue(octet & 0x0F)); 59 buffer->Add(HexCharOfValue(octet & 0x0F));
60 } 60 }
61 61
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
63 uint8_t x = (c >> 12) & 0xF; 63 char s[4];
64 uint8_t y = (c >> 6) & 63; 64 int number_of_bytes;
65 uint8_t z = c & 63; 65 number_of_bytes =
66 if (c <= 0x007F) { 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
67 AddHexEncodedToBuffer(c, buffer); 67 for (int k = 0; k < number_of_bytes; k++) {
68 } else if (c <= 0x07FF) { 68 AddHexEncodedToBuffer(s[k], buffer);
69 AddHexEncodedToBuffer(y + 192, buffer);
70 AddHexEncodedToBuffer(z + 128, buffer);
71 } else {
72 AddHexEncodedToBuffer(x + 224, buffer);
73 AddHexEncodedToBuffer(y + 128, buffer);
74 AddHexEncodedToBuffer(z + 128, buffer);
75 } 69 }
76 } 70 }
77 71
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; 73 char s[4];
80 uint8_t w = (cc1 >> 2) & 0xF; 74 int number_of_bytes =
81 uint8_t x = cc1 & 3; 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
82 uint8_t y = (cc2 >> 6) & 0xF; 76 unibrow::Utf16::kNoPreviousCharacter, false);
83 uint8_t z = cc2 & 63; 77 for (int k = 0; k < number_of_bytes; k++) {
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); 78 AddHexEncodedToBuffer(s[k], buffer);
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); 79 }
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
87 AddHexEncodedToBuffer(z + 128, buffer);
88 } 80 }
89 81
90 } // anonymous namespace 82 } // anonymous namespace
91 83
92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { 84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri,
85 bool is_uri) {
93 uri = String::Flatten(uri); 86 uri = String::Flatten(uri);
94 int uri_length = uri->length(); 87 int uri_length = uri->length();
95 List<uint8_t> buffer(uri_length); 88 List<uint8_t> buffer(uri_length);
96 89
97 { 90 {
98 DisallowHeapAllocation no_gc; 91 DisallowHeapAllocation no_gc;
99 String::FlatContent uri_content = uri->GetFlatContent(); 92 String::FlatContent uri_content = uri->GetFlatContent();
100 93
101 for (int k = 0; k < uri_length; k++) { 94 for (int k = 0; k < uri_length; k++) {
102 uc16 cc1 = uri_content.Get(k); 95 uc16 cc1 = uri_content.Get(k);
(...skipping 10 matching lines...) Expand all
113 if (IsUnescapePredicateInUriComponent(cc1) || 106 if (IsUnescapePredicateInUriComponent(cc1) ||
114 (is_uri && IsUriSeparator(cc1))) { 107 (is_uri && IsUriSeparator(cc1))) {
115 buffer.Add(cc1); 108 buffer.Add(cc1);
116 } else { 109 } else {
117 EncodeSingle(cc1, &buffer); 110 EncodeSingle(cc1, &buffer);
118 } 111 }
119 continue; 112 continue;
120 } 113 }
121 114
122 AllowHeapAllocation allocate_error_and_return; 115 AllowHeapAllocation allocate_error_and_return;
123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); 116 THROW_NEW_ERROR(isolate, NewURIError(), Object);
124 } 117 }
125 } 118 }
126 119
127 Handle<String> result; 120 Handle<String> result;
128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 121 ASSIGN_RETURN_ON_EXCEPTION(
129 isolate, result, 122 isolate, result,
130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); 123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object);
131 return *result; 124 return result;
125 }
126
127 namespace { // anonymous namespace for DecodeURI helper functions
128
129 bool IsReservedPredicate(uc16 c) {
130 switch (c) {
131 case '#':
132 case '$':
133 case '&':
134 case '+':
135 case ',':
136 case '/':
137 case ':':
138 case ';':
139 case '=':
140 case '?':
141 case '@':
142 return true;
143 default:
144 return false;
145 }
146 }
147
148 bool IsReplacementCharacter(const uint8_t* octets, int length) {
149 // The replacement character is at codepoint U+FFFD in the Unicode Specials
150 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.
151 if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf ||
152 octets[2] != 0xbd) {
153 return false;
154 }
155 return true;
156 }
157
158 bool DecodeOctets(const uint8_t* octets, int length,
159 List<uc16>* two_byte_buffer) {
160 size_t cursor = 0;
161 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
162 if (value == unibrow::Utf8::kBadChar &&
163 !IsReplacementCharacter(octets, length)) {
164 return false;
165 }
166
167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
168 two_byte_buffer->Add(value);
169 } else {
170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));
171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));
172 }
173 return true;
174 }
175
176 bool TwoDigitHex(uc16* decoded, int index, String::FlatContent* uri_content) {
177 char high = HexValue(uri_content->Get(index + 1));
178 char low = HexValue(uri_content->Get(index + 2));
179 if (high < 0 || low < 0) {
180 return false;
181 }
182 *decoded = (high << 4) | low;
183 return true;
184 }
185
186 template <typename T>
187 bool AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,
188 bool is_uri, List<T>* buffer) {
189 if (is_uri && IsReservedPredicate(decoded)) {
190 buffer->Add('%');
191 uc16 first = uri_content->Get(index + 1);
192 uc16 second = uri_content->Get(index + 2);
193 if (first > std::numeric_limits<T>::max() ||
Yang 2016/05/23 11:24:59 Do you expect this to happen in some cases? Imo th
Franzi 2016/05/24 15:07:38 You're right. Can never happen. Done.
194 second > std::numeric_limits<T>::max()) {
195 return false;
196 }
197 buffer->Add(first);
198 buffer->Add(second);
199 } else {
200 buffer->Add(decoded);
201 }
202 return true;
203 }
204
205 bool IntoTwoByte(int index, bool is_uri, int uri_length,
206 String::FlatContent* uri_content,
207 List<uc16>* two_byte_buffer) {
208 for (int k = index; k < uri_length; k++) {
209 uc16 code = uri_content->Get(k);
210 if (code == '%') {
211 uc16 decoded;
212 if (k + 2 >= uri_length || !TwoDigitHex(&decoded, k, uri_content)) {
213 return false;
214 }
215 k += 2;
216 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
217 int n = 1;
218 while ((decoded << n) & 0x80) {
Yang 2016/05/23 11:24:59 Actually, you could merge this while loop with the
Franzi 2016/05/24 15:07:38 Merged. Also renamed n to number_of_continuation_b
219 n++;
220 }
221 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {
222 return false;
223 }
224 uint8_t octets[4];
225 int octet_length = 0;
226 octets[octet_length++] = decoded;
227
228 for (int i = 1; i < n; i++) {
229 uc16 decodedTrail;
230
231 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||
232 !TwoDigitHex(&decodedTrail, k, uri_content)) {
233 return false;
234 }
235 k += 2;
236 octets[octet_length++] = decodedTrail;
237 }
238
239 if (!DecodeOctets(octets, octet_length, two_byte_buffer)) {
240 return false;
241 }
242 } else {
243 if (!AddToBuffer(decoded, uri_content, k - 2, is_uri,
244 two_byte_buffer)) {
245 return false;
246 }
247 }
248 } else {
249 two_byte_buffer->Add(code);
250 }
251 }
252 return true;
253 }
254
255 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
256 List<uint8_t>* one_byte_buffer,
257 List<uc16>* two_byte_buffer) {
258 DisallowHeapAllocation no_gc;
259 String::FlatContent uri_content = uri->GetFlatContent();
260
261 int uri_length = uri->length();
262 for (int k = 0; k < uri_length; k++) {
263 uc16 code = uri_content.Get(k);
264 if (code == '%') {
265 uc16 decoded;
266 if (k + 2 >= uri_length || !TwoDigitHex(&decoded, k, &uri_content)) {
267 return false;
268 }
269
270 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
271 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
272 two_byte_buffer);
273 }
274
275 if (!AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer)) {
276 return false;
277 }
278 k += 2;
279 } else {
280 if (code > unibrow::Utf8::kMaxOneByteChar) {
281 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
282 two_byte_buffer);
283 }
284 one_byte_buffer->Add(code);
285 }
286 }
287 return true;
288 }
289
290 } // anonymous namespace
291
292 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri,
293 bool is_uri) {
294 uri = String::Flatten(uri);
295 List<uint8_t> one_byte_buffer;
296 List<uc16> two_byte_buffer;
297
298 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
299 THROW_NEW_ERROR(isolate, NewURIError(), Object);
300 }
301
302 if (two_byte_buffer.is_empty()) {
303 Handle<SeqOneByteString> result;
304
305 ASSIGN_RETURN_ON_EXCEPTION(
306 isolate, result,
307 isolate->factory()->NewRawOneByteString(one_byte_buffer.length()),
Yang 2016/05/23 11:24:59 You can use NewStringFromOneByte here.
Franzi 2016/05/24 15:07:38 Done.
308 Object);
309 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
310 one_byte_buffer.length());
311 return result;
312 }
313
314 Handle<SeqTwoByteString> result;
315 ASSIGN_RETURN_ON_EXCEPTION(
316 isolate, result, isolate->factory()->NewRawTwoByteString(
317 one_byte_buffer.length() + two_byte_buffer.length()),
318 Object);
319
320 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
321 one_byte_buffer.length());
322 CopyChars(result->GetChars() + one_byte_buffer.length(),
323 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());
324
325 return result;
132 } 326 }
133 327
134 } // namespace internal 328 } // namespace internal
135 } // namespace v8 329 } // namespace v8
OLDNEW
« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698