Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Rebase Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/uri.h" 5 #include "src/uri.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/handles.h" 8 #include "src/handles.h"
9 #include "src/isolate-inl.h" 9 #include "src/isolate-inl.h"
10 #include "src/list.h" 10 #include "src/list.h"
11 11
12 namespace v8 { 12 namespace v8 {
13 namespace internal { 13 namespace internal {
14 14
15 namespace { // anonymous namespace for DecodeURI helper functions
16 bool IsReservedPredicate(uc16 c) {
17 switch (c) {
18 case '#':
19 case '$':
20 case '&':
21 case '+':
22 case ',':
23 case '/':
24 case ':':
25 case ';':
26 case '=':
27 case '?':
28 case '@':
29 return true;
30 default:
31 return false;
32 }
33 }
34
35 bool IsReplacementCharacter(const uint8_t* octets, int length) {
36 // The replacement character is at codepoint U+FFFD in the Unicode Specials
37 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.
38 if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf ||
39 octets[2] != 0xbd) {
40 return false;
41 }
42 return true;
43 }
44
45 bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) {
46 size_t cursor = 0;
47 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
48 if (value == unibrow::Utf8::kBadChar &&
49 !IsReplacementCharacter(octets, length)) {
50 return false;
51 }
52
53 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
54 buffer->Add(value);
55 } else {
56 buffer->Add(unibrow::Utf16::LeadSurrogate(value));
57 buffer->Add(unibrow::Utf16::TrailSurrogate(value));
58 }
59 return true;
60 }
61
62 bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) {
63 char high = HexValue(uri_content->Get(index + 1));
64 char low = HexValue(uri_content->Get(index + 2));
65 if (high < 0 || low < 0) {
66 return false;
67 }
68 *decoded = (high << 4) | low;
69 return true;
70 }
71
72 template <typename T>
73 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,
74 bool is_uri, List<T>* buffer) {
75 if (is_uri && IsReservedPredicate(decoded)) {
76 buffer->Add('%');
77 uc16 first = uri_content->Get(index + 1);
78 uc16 second = uri_content->Get(index + 2);
79 DCHECK_GT(std::numeric_limits<T>::max(), first);
80 DCHECK_GT(std::numeric_limits<T>::max(), second);
81
82 buffer->Add(first);
83 buffer->Add(second);
84 } else {
85 buffer->Add(decoded);
86 }
87 }
88
89 bool IntoTwoByte(int index, bool is_uri, int uri_length,
90 String::FlatContent* uri_content, List<uc16>* buffer) {
91 for (int k = index; k < uri_length; k++) {
92 uc16 code = uri_content->Get(k);
93 if (code == '%') {
94 uc16 decoded;
95 if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) {
96 return false;
97 }
98 k += 2;
99 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
100 uint8_t octets[unibrow::Utf8::kMaxEncodedSize];
101 octets[0] = decoded;
102
103 int number_of_continuation_bytes = 0;
104 while ((decoded << ++number_of_continuation_bytes) & 0x80) {
105 if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) {
106 return false;
107 }
108
109 uc16 continuation_byte;
110
111 if (uri_content->Get(++k) != '%' ||
112 !TwoDigitHex(k, uri_content, &continuation_byte)) {
113 return false;
114 }
115 k += 2;
116 octets[number_of_continuation_bytes] = continuation_byte;
117 }
118
119 if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {
120 return false;
121 }
122 } else {
123 AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);
124 }
125 } else {
126 buffer->Add(code);
127 }
128 }
129 return true;
130 }
131
132 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
133 List<uint8_t>* one_byte_buffer,
134 List<uc16>* two_byte_buffer) {
135 DisallowHeapAllocation no_gc;
136 String::FlatContent uri_content = uri->GetFlatContent();
137
138 int uri_length = uri->length();
139 for (int k = 0; k < uri_length; k++) {
140 uc16 code = uri_content.Get(k);
141 if (code == '%') {
142 uc16 decoded;
143 if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) {
144 return false;
145 }
146
147 if (decoded > unibrow::Utf8::kMaxOneByteChar) {
148 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
149 two_byte_buffer);
150 }
151
152 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
153 k += 2;
154 } else {
155 if (code > unibrow::Utf8::kMaxOneByteChar) {
156 return IntoTwoByte(k, is_uri, uri_length, &uri_content,
157 two_byte_buffer);
158 }
159 one_byte_buffer->Add(code);
160 }
161 }
162 return true;
163 }
164
165 } // anonymous namespace
166
167 MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,
168 bool is_uri) {
169 uri = String::Flatten(uri);
170 List<uint8_t> one_byte_buffer;
171 List<uc16> two_byte_buffer;
172
173 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
174 THROW_NEW_ERROR(isolate, NewURIError(), String);
175 }
176
177 if (two_byte_buffer.is_empty()) {
178 return isolate->factory()->NewStringFromOneByte(
179 one_byte_buffer.ToConstVector());
180 }
181
182 Handle<SeqTwoByteString> result;
183 ASSIGN_RETURN_ON_EXCEPTION(
184 isolate, result, isolate->factory()->NewRawTwoByteString(
185 one_byte_buffer.length() + two_byte_buffer.length()),
186 String);
187
188 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),
189 one_byte_buffer.length());
190 CopyChars(result->GetChars() + one_byte_buffer.length(),
191 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());
192
193 return result;
194 }
195
15 namespace { // anonymous namespace for EncodeURI helper functions 196 namespace { // anonymous namespace for EncodeURI helper functions
16 bool IsUnescapePredicateInUriComponent(uc16 c) { 197 bool IsUnescapePredicateInUriComponent(uc16 c) {
17 if (IsAlphaNumeric(c)) { 198 if (IsAlphaNumeric(c)) {
18 return true; 199 return true;
19 } 200 }
20 201
21 switch (c) { 202 switch (c) {
22 case '!': 203 case '!':
23 case '\'': 204 case '\'':
24 case '(': 205 case '(':
(...skipping 28 matching lines...) Expand all
53 } 234 }
54 } 235 }
55 236
56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { 237 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
57 buffer->Add('%'); 238 buffer->Add('%');
58 buffer->Add(HexCharOfValue(octet >> 4)); 239 buffer->Add(HexCharOfValue(octet >> 4));
59 buffer->Add(HexCharOfValue(octet & 0x0F)); 240 buffer->Add(HexCharOfValue(octet & 0x0F));
60 } 241 }
61 242
62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) { 243 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
63 uint8_t x = (c >> 12) & 0xF; 244 char s[4];
64 uint8_t y = (c >> 6) & 63; 245 int number_of_bytes;
65 uint8_t z = c & 63; 246 number_of_bytes =
66 if (c <= 0x007F) { 247 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
67 AddHexEncodedToBuffer(c, buffer); 248 for (int k = 0; k < number_of_bytes; k++) {
68 } else if (c <= 0x07FF) { 249 AddHexEncodedToBuffer(s[k], buffer);
69 AddHexEncodedToBuffer(y + 192, buffer);
70 AddHexEncodedToBuffer(z + 128, buffer);
71 } else {
72 AddHexEncodedToBuffer(x + 224, buffer);
73 AddHexEncodedToBuffer(y + 128, buffer);
74 AddHexEncodedToBuffer(z + 128, buffer);
75 } 250 }
76 } 251 }
77 252
78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { 253 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
79 uint8_t u = ((cc1 >> 6) & 0xF) + 1; 254 char s[4];
80 uint8_t w = (cc1 >> 2) & 0xF; 255 int number_of_bytes =
81 uint8_t x = cc1 & 3; 256 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
82 uint8_t y = (cc2 >> 6) & 0xF; 257 unibrow::Utf16::kNoPreviousCharacter, false);
83 uint8_t z = cc2 & 63; 258 for (int k = 0; k < number_of_bytes; k++) {
84 AddHexEncodedToBuffer((u >> 2) + 240, buffer); 259 AddHexEncodedToBuffer(s[k], buffer);
85 AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); 260 }
86 AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
87 AddHexEncodedToBuffer(z + 128, buffer);
88 } 261 }
89 262
90 } // anonymous namespace 263 } // anonymous namespace
91 264
92 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri, 265 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
93 bool is_uri) { 266 bool is_uri) {
94 uri = String::Flatten(uri); 267 uri = String::Flatten(uri);
95 int uri_length = uri->length(); 268 int uri_length = uri->length();
96 List<uint8_t> buffer(uri_length); 269 List<uint8_t> buffer(uri_length);
97 270
(...skipping 25 matching lines...) Expand all
123 AllowHeapAllocation allocate_error_and_return; 296 AllowHeapAllocation allocate_error_and_return;
124 THROW_NEW_ERROR(isolate, NewURIError(), String); 297 THROW_NEW_ERROR(isolate, NewURIError(), String);
125 } 298 }
126 } 299 }
127 300
128 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()); 301 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());
129 } 302 }
130 303
131 } // namespace internal 304 } // namespace internal
132 } // namespace v8 305 } // namespace v8
OLDNEW
« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698