Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Use Utf8::Encode() and ValueOf() Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/uri.h" 5 #include "src/uri.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/handles.h" 8 #include "src/handles.h"
9 #include "src/isolate-inl.h" 9 #include "src/isolate-inl.h"
10 #include "src/list.h" 10 #include "src/list.h"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 } 53 }
54 } 54 }
55 55
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { 56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
57 buffer->Add('%'); 57 buffer->Add('%');
58 buffer->Add(HexCharOfValue(octet >> 4)); 58 buffer->Add(HexCharOfValue(octet >> 4));
59 buffer->Add(HexCharOfValue(octet & 0x0F)); 59 buffer->Add(HexCharOfValue(octet & 0x0F));
60 } 60 }
61 61
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { 62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
63 uint8_t x = (c >> 12) & 0xF; 63 char s[4];
64 uint8_t y = (c >> 6) & 63; 64 int number_of_bytes;
65 uint8_t z = c & 63; 65 number_of_bytes =
66 if (c <= 0x007F) { 66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
67 AddHexEncodedToBuffer(c, buffer); 67 for (int k = 0; k < number_of_bytes; k++) {
68 } else if (c <= 0x07FF) { 68 AddHexEncodedToBuffer(s[k], buffer);
69 AddHexEncodedToBuffer(y + 192, buffer);
70 AddHexEncodedToBuffer(z + 128, buffer);
71 } else {
72 AddHexEncodedToBuffer(x + 224, buffer);
73 AddHexEncodedToBuffer(y + 128, buffer);
74 AddHexEncodedToBuffer(z + 128, buffer);
75 } 69 }
76 } 70 }
77 71
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { 72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; 73 char s[4];
80 uint8_t w = (cc1 >> 2) & 0xF; 74 int number_of_bytes =
81 uint8_t x = cc1 & 3; 75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
82 uint8_t y = (cc2 >> 6) & 0xF; 76 unibrow::Utf16::kNoPreviousCharacter, false);
83 uint8_t z = cc2 & 63; 77 for (int k = 0; k < number_of_bytes; k++) {
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); 78 AddHexEncodedToBuffer(s[k], buffer);
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); 79 }
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
87 AddHexEncodedToBuffer(z + 128, buffer);
88 } 80 }
89 81
90 } // anonymous namespace 82 } // anonymous namespace
91 83
92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { 84 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {
93 uri = String::Flatten(uri); 85 uri = String::Flatten(uri);
94 int uri_length = uri->length(); 86 int uri_length = uri->length();
95 List<uint8_t> buffer(uri_length); 87 List<uint8_t> buffer(uri_length);
96 88
97 { 89 {
(...skipping 26 matching lines...) Expand all
124 } 116 }
125 } 117 }
126 118
127 Handle<String> result; 119 Handle<String> result;
128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 120 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
129 isolate, result, 121 isolate, result,
130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); 122 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()));
131 return *result; 123 return *result;
132 } 124 }
133 125
126 namespace { // anonymous namespace for DecodeURI helper functions
127
128 bool IsReservedPredicate(uc16 c) {
129 switch (c) {
130 case '#':
131 case '$':
132 case '&':
133 case '+':
134 case ',':
135 case '/':
136 case ':':
137 case ';':
138 case '=':
139 case '?':
140 case '@':
141 return true;
142 default:
143 return false;
144 }
145 }
146
147 bool IsRepalcementCharacter(List<uint8_t>* octets) {
148 // 0xFFFD is %ef%bf%bd
149 if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf ||
150 octets->at(2) != 0xbd) {
151 return false;
152 }
153 return true;
154 }
155
156 bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) {
157 size_t cursor = 0;
158 uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(),
159 octets->length(), &cursor);
160 // kBadChar is the Replacement Character, which is the decoding of
161 // valid input %ef%bf%bd
162 if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) {
163 return false;
164 }
165
166 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
167 two_byte_buffer->Add(value);
168 } else {
169 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));
170 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));
171 }
172 return true;
173 }
174
175 bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) {
176 char high = HexValue(uri_content->Get(k + 1));
177 char low = HexValue(uri_content->Get(k + 2));
178 if (high < 0 || low < 0) {
179 return false;
180 }
181 decoded = (high << 4) | low;
182 return true;
183 }
184
185 template <typename T>
186 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k,
187 bool is_uri, List<T>* buffer) {
188 if (is_uri && IsReservedPredicate(decoded)) {
189 buffer->Add('%');
190 buffer->Add(uri_content->Get(k + 1));
191 buffer->Add(uri_content->Get(k + 2));
192 } else {
193 buffer->Add(decoded);
194 }
195 }
196
197 bool IntoTwoByte(int index, bool is_uri, int uri_length,
198 String::FlatContent* uri_content,
199 List<uc16>* two_byte_buffer) {
200 for (int k = index; k < uri_length; k++) {
201 uc16 code = uri_content->Get(k);
202 if (code == '%') {
203 uc16 decoded;
204 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) {
205 return false;
206 }
207 k += 2;
208 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
209 int n = 0;
210 while (((decoded << ++n) & 0x80) != 0) {
211 }
212 if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {
213 return false;
214 }
215 List<uint8_t> octets;
216 octets.Add(decoded);
217
218 for (int i = 1; i < n; i++) {
219 uc16 decodedTrail;
220
221 if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||
222 !TwoDigitHex(decodedTrail, k, uri_content)) {
223 return false;
224 }
225 k += 2;
226 octets.Add(decodedTrail);
227 }
228
229 if (!DecodeOctets(&octets, two_byte_buffer)) {
230 return false;
231 }
232 } else {
233 AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer);
234 }
235 } else {
236 two_byte_buffer->Add(code);
237 }
238 }
239 return true;
240 }
241
242 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
243 List<uint8_t>* one_byte_buffer,
244 List<uc16>* two_byte_buffer) {
245 DisallowHeapAllocation no_gc;
246 String::FlatContent uri_content = uri->GetFlatContent();
247
248 int uri_length = uri->length();
249 for (int k = 0; k < uri_length; k++) {
250 uc16 code = uri_content.Get(k);
251 if (code == '%') {
252 uc16 decoded;
253 if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) {
254 return false;
255 }
256
257 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
258 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
259 two_byte_buffer);
260 }
261
262 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
263 k += 2;
264 } else {
265 if (code > unibrow::Utf8::kMaxOneByteChar) {
266 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
267 two_byte_buffer);
268 }
269 one_byte_buffer->Add(code);
270 }
271 }
272 return true;
273 }
274
275 } // anonymous namespace
276
277 Object* Uri::Decode(Isolate* isolate, Handle<String> uri, bool is_uri) {
278 uri = String::Flatten(uri);
279 List<uint8_t> one_byte_buffer;
280 List<uc16> two_byte_buffer;
281
282 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
283 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());
284 }
285
286 Handle<String> left = isolate->factory()->InternalizeOneByteString(
287 one_byte_buffer.ToConstVector());
288
289 Handle<String> right = isolate->factory()->InternalizeTwoByteString(
290 two_byte_buffer.ToConstVector());
291
292 Handle<String> result;
293 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
294 isolate, result, isolate->factory()->NewConsString(left, right));
295
296 return *result;
297 }
298
134 } // namespace internal 299 } // namespace internal
135 } // namespace v8 300 } // namespace v8
OLDNEW
« src/uri.h ('K') | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698