Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(106)

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Address review comments Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/uri.h" 5 #include "src/uri.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/handles.h" 8 #include "src/handles.h"
9 #include "src/isolate-inl.h" 9 #include "src/isolate-inl.h"
10 #include "src/list.h" 10 #include "src/list.h"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 } 53 }
54 } 54 }
55 55
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
57 buffer->Add('%'); 57 buffer->Add('%');
58 buffer->Add(HexCharOfValue(octet >> 4)); 58 buffer->Add(HexCharOfValue(octet >> 4));
59 buffer->Add(HexCharOfValue(octet & 0x0F)); 59 buffer->Add(HexCharOfValue(octet & 0x0F));
60 } 60 }
61 61
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
63 uint8_t x = (c >> 12) & 0xF; 63 char s[4];
64 uint8_t y = (c >> 6) & 63; 64 int number_of_bytes;
65 uint8_t z = c & 63; 65 number_of_bytes =
66 if (c <= 0x007F) { 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
67 AddHexEncodedToBuffer(c, buffer); 67 for (int k = 0; k < number_of_bytes; k++) {
68 } else if (c <= 0x07FF) { 68 AddHexEncodedToBuffer(s[k], buffer);
69 AddHexEncodedToBuffer(y + 192, buffer);
70 AddHexEncodedToBuffer(z + 128, buffer);
71 } else {
72 AddHexEncodedToBuffer(x + 224, buffer);
73 AddHexEncodedToBuffer(y + 128, buffer);
74 AddHexEncodedToBuffer(z + 128, buffer);
75 } 69 }
76 } 70 }
77 71
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; 73 char s[4];
80 uint8_t w = (cc1 >> 2) & 0xF; 74 int number_of_bytes =
81 uint8_t x = cc1 & 3; 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
82 uint8_t y = (cc2 >> 6) & 0xF; 76 unibrow::Utf16::kNoPreviousCharacter, false);
83 uint8_t z = cc2 & 63; 77 for (int k = 0; k < number_of_bytes; k++) {
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); 78 AddHexEncodedToBuffer(s[k], buffer);
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); 79 }
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
87 AddHexEncodedToBuffer(z + 128, buffer);
88 } 80 }
89 81
90 } // anonymous namespace 82 } // anonymous namespace
91 83
92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { 84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri,
85 bool is_uri) {
93 uri = String::Flatten(uri); 86 uri = String::Flatten(uri);
94 int uri_length = uri->length(); 87 int uri_length = uri->length();
95 List<uint8_t> buffer(uri_length); 88 List<uint8_t> buffer(uri_length);
96 89
97 { 90 {
98 DisallowHeapAllocation no_gc; 91 DisallowHeapAllocation no_gc;
99 String::FlatContent uri_content = uri->GetFlatContent(); 92 String::FlatContent uri_content = uri->GetFlatContent();
100 93
101 for (int k = 0; k < uri_length; k++) { 94 for (int k = 0; k < uri_length; k++) {
102 uc16 cc1 = uri_content.Get(k); 95 uc16 cc1 = uri_content.Get(k);
(...skipping 10 matching lines...) Expand all
113 if (IsUnescapePredicateInUriComponent(cc1) || 106 if (IsUnescapePredicateInUriComponent(cc1) ||
114 (is_uri && IsUriSeparator(cc1))) { 107 (is_uri && IsUriSeparator(cc1))) {
115 buffer.Add(cc1); 108 buffer.Add(cc1);
116 } else { 109 } else {
117 EncodeSingle(cc1, &buffer); 110 EncodeSingle(cc1, &buffer);
118 } 111 }
119 continue; 112 continue;
120 } 113 }
121 114
122 AllowHeapAllocation allocate_error_and_return; 115 AllowHeapAllocation allocate_error_and_return;
123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); 116 THROW_NEW_ERROR(isolate, NewURIError(), Object);
124 } 117 }
125 } 118 }
126 119
127 Handle<String> result; 120 Handle<String> result;
128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 121 ASSIGN_RETURN_ON_EXCEPTION(
129 isolate, result, 122 isolate, result,
130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); 123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object);
131 return *result; 124 return result;
125 }
126
127 namespace { // anonymous namespace for DecodeURI helper functions
128
129 bool IsReservedPredicate(uc16 c) {
130 switch (c) {
131 case '#':
132 case '$':
133 case '&':
134 case '+':
135 case ',':
136 case '/':
137 case ':':
138 case ';':
139 case '=':
140 case '?':
141 case '@':
142 return true;
143 default:
144 return false;
145 }
146 }
147
148 bool IsRepalcementCharacter(List<uint8_t>* octets) {
Yang 2016/05/23 06:44:32 typo.
Franzi 2016/05/23 08:55:57 Done.
149 // 0xFFFD is %ef%bf%bd
Yang 2016/05/23 06:44:32 What does this comment mean?
Franzi 2016/05/23 08:55:57 Reworded the comment to clarify why we check for t
150 if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf ||
151 octets->at(2) != 0xbd) {
152 return false;
153 }
154 return true;
155 }
156
157 bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) {
158 size_t cursor = 0;
159 uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(),
160 octets->length(), &cursor);
161 // kBadChar is the Replacement Character, which is the decoding of
162 // valid input %ef%bf%bd
163 if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) {
164 return false;
165 }
166
167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
168 two_byte_buffer->Add(value);
169 } else {
170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));
171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));
172 }
173 return true;
174 }
175
176 bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) {
Yang 2016/05/23 06:44:31 Can we use uc16* as argument type? That way it's e
Yang 2016/05/23 06:44:32 can we call the second argument "index" or somethi
Franzi 2016/05/23 08:55:57 Done.
Franzi 2016/05/23 08:55:57 Done.
177 char high = HexValue(uri_content->Get(k + 1));
Yang 2016/05/23 06:44:32 FlatContent::Get returns a uc16. Casting that to s
Franzi 2016/05/23 08:55:57 Not sure I understand the comment. HexValue takes
Yang 2016/05/23 11:24:59 Ah I see. I misunderstood. Nevermind this comment.
178 char low = HexValue(uri_content->Get(k + 2));
179 if (high < 0 || low < 0) {
180 return false;
181 }
182 decoded = (high << 4) | low;
183 return true;
184 }
185
186 template <typename T>
187 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k,
Yang 2016/05/23 06:44:32 same here, "index" instead of "k".
Franzi 2016/05/23 08:55:57 Done.
188 bool is_uri, List<T>* buffer) {
189 if (is_uri && IsReservedPredicate(decoded)) {
190 buffer->Add('%');
191 buffer->Add(uri_content->Get(k + 1));
192 buffer->Add(uri_content->Get(k + 2));
Yang 2016/05/23 06:44:31 Can we have a safeguard here that we don't have im
Franzi 2016/05/23 08:55:57 Done. Throwing exception if uri_content->Get() is
193 } else {
194 buffer->Add(decoded);
195 }
196 }
197
198 bool IntoTwoByte(int index, bool is_uri, int uri_length,
199 String::FlatContent* uri_content,
200 List<uc16>* two_byte_buffer) {
201 for (int k = index; k < uri_length; k++) {
202 uc16 code = uri_content->Get(k);
203 if (code == '%') {
204 uc16 decoded;
205 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) {
206 return false;
207 }
208 k += 2;
209 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
210 int n = 0;
211 while (((decoded << ++n) & 0x80) != 0) {
Yang 2016/05/23 06:44:31 Can we have this as do { n++; } while ((decode
Franzi 2016/05/23 08:55:57 Changed it to a simple while loop: int n = 1;
212 }
213 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {
214 return false;
215 }
216 List<uint8_t> octets;
Yang 2016/05/23 06:44:31 octets will at most have the length 4, right? Can
Franzi 2016/05/23 08:55:57 Done.
217 octets.Add(decoded);
218
219 for (int i = 1; i < n; i++) {
220 uc16 decodedTrail;
221
222 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||
223 !TwoDigitHex(decodedTrail, k, uri_content)) {
224 return false;
225 }
226 k += 2;
227 octets.Add(decodedTrail);
228 }
229
230 if (!DecodeOctets(&octets, two_byte_buffer)) {
231 return false;
232 }
233 } else {
234 AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer);
235 }
236 } else {
237 two_byte_buffer->Add(code);
238 }
239 }
240 return true;
241 }
242
243 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
244 List<uint8_t>* one_byte_buffer,
245 List<uc16>* two_byte_buffer) {
246 DisallowHeapAllocation no_gc;
247 String::FlatContent uri_content = uri->GetFlatContent();
248
249 int uri_length = uri->length();
250 for (int k = 0; k < uri_length; k++) {
251 uc16 code = uri_content.Get(k);
252 if (code == '%') {
253 uc16 decoded;
254 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) {
255 return false;
256 }
257
258 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
259 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
260 two_byte_buffer);
261 }
262
263 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
264 k += 2;
265 } else {
266 if (code > unibrow::Utf8::kMaxOneByteChar) {
267 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
268 two_byte_buffer);
269 }
270 one_byte_buffer->Add(code);
271 }
272 }
273 return true;
274 }
275
276 } // anonymous namespace
277
278 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri,
279 bool is_uri) {
280 uri = String::Flatten(uri);
281 List<uint8_t> one_byte_buffer;
282 List<uc16> two_byte_buffer;
283
284 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
285 THROW_NEW_ERROR(isolate, NewURIError(), Object);
286 }
287
288 Handle<String> left = isolate->factory()->InternalizeOneByteString(
289 one_byte_buffer.ToConstVector());
290
291 Handle<String> right = isolate->factory()->InternalizeTwoByteString(
292 two_byte_buffer.ToConstVector());
293
294 Handle<String> result;
295 ASSIGN_RETURN_ON_EXCEPTION(
296 isolate, result, isolate->factory()->NewConsString(left, right), Object);
Yang 2016/05/23 06:44:32 Since we are going to copy from list into the heap
Franzi 2016/05/23 08:55:57 Returning sequential one- or two-byte string.
297
298 return result;
132 } 299 }
133 300
134 } // namespace internal 301 } // namespace internal
135 } // namespace v8 302 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698