OLD | NEW |
| (Empty) |
1 // Copyright 2013 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef V8_URI_H_ | |
6 #define V8_URI_H_ | |
7 | |
8 #include "src/v8.h" | |
9 | |
10 #include "src/conversions.h" | |
11 #include "src/string-search.h" | |
12 #include "src/utils.h" | |
13 | |
14 namespace v8 { | |
15 namespace internal { | |
16 | |
17 | |
18 template <typename Char> | |
19 static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); | |
20 | |
21 | |
22 template <> | |
23 Vector<const uint8_t> GetCharVector(Handle<String> string) { | |
24 String::FlatContent flat = string->GetFlatContent(); | |
25 DCHECK(flat.IsOneByte()); | |
26 return flat.ToOneByteVector(); | |
27 } | |
28 | |
29 | |
30 template <> | |
31 Vector<const uc16> GetCharVector(Handle<String> string) { | |
32 String::FlatContent flat = string->GetFlatContent(); | |
33 DCHECK(flat.IsTwoByte()); | |
34 return flat.ToUC16Vector(); | |
35 } | |
36 | |
37 | |
38 class URIUnescape : public AllStatic { | |
39 public: | |
40 template<typename Char> | |
41 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate, | |
42 Handle<String> source); | |
43 | |
44 private: | |
45 static const signed char kHexValue['g']; | |
46 | |
47 template<typename Char> | |
48 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow( | |
49 Isolate* isolate, Handle<String> string, int start_index); | |
50 | |
51 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); | |
52 | |
53 template <typename Char> | |
54 static INLINE(int UnescapeChar(Vector<const Char> vector, | |
55 int i, | |
56 int length, | |
57 int* step)); | |
58 }; | |
59 | |
60 | |
61 const signed char URIUnescape::kHexValue[] = { | |
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
65 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, | |
66 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
68 -1, 10, 11, 12, 13, 14, 15 }; | |
69 | |
70 | |
71 template<typename Char> | |
72 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate, | |
73 Handle<String> source) { | |
74 int index; | |
75 { DisallowHeapAllocation no_allocation; | |
76 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%")); | |
77 index = search.Search(GetCharVector<Char>(source), 0); | |
78 if (index < 0) return source; | |
79 } | |
80 return UnescapeSlow<Char>(isolate, source, index); | |
81 } | |
82 | |
83 | |
84 template <typename Char> | |
85 MaybeHandle<String> URIUnescape::UnescapeSlow( | |
86 Isolate* isolate, Handle<String> string, int start_index) { | |
87 bool one_byte = true; | |
88 int length = string->length(); | |
89 | |
90 int unescaped_length = 0; | |
91 { DisallowHeapAllocation no_allocation; | |
92 Vector<const Char> vector = GetCharVector<Char>(string); | |
93 for (int i = start_index; i < length; unescaped_length++) { | |
94 int step; | |
95 if (UnescapeChar(vector, i, length, &step) > | |
96 String::kMaxOneByteCharCode) { | |
97 one_byte = false; | |
98 } | |
99 i += step; | |
100 } | |
101 } | |
102 | |
103 DCHECK(start_index < length); | |
104 Handle<String> first_part = | |
105 isolate->factory()->NewProperSubString(string, 0, start_index); | |
106 | |
107 int dest_position = 0; | |
108 Handle<String> second_part; | |
109 DCHECK(unescaped_length <= String::kMaxLength); | |
110 if (one_byte) { | |
111 Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString( | |
112 unescaped_length).ToHandleChecked(); | |
113 DisallowHeapAllocation no_allocation; | |
114 Vector<const Char> vector = GetCharVector<Char>(string); | |
115 for (int i = start_index; i < length; dest_position++) { | |
116 int step; | |
117 dest->SeqOneByteStringSet(dest_position, | |
118 UnescapeChar(vector, i, length, &step)); | |
119 i += step; | |
120 } | |
121 second_part = dest; | |
122 } else { | |
123 Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString( | |
124 unescaped_length).ToHandleChecked(); | |
125 DisallowHeapAllocation no_allocation; | |
126 Vector<const Char> vector = GetCharVector<Char>(string); | |
127 for (int i = start_index; i < length; dest_position++) { | |
128 int step; | |
129 dest->SeqTwoByteStringSet(dest_position, | |
130 UnescapeChar(vector, i, length, &step)); | |
131 i += step; | |
132 } | |
133 second_part = dest; | |
134 } | |
135 return isolate->factory()->NewConsString(first_part, second_part); | |
136 } | |
137 | |
138 | |
139 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { | |
140 if (character1 > 'f') return -1; | |
141 int hi = kHexValue[character1]; | |
142 if (hi == -1) return -1; | |
143 if (character2 > 'f') return -1; | |
144 int lo = kHexValue[character2]; | |
145 if (lo == -1) return -1; | |
146 return (hi << 4) + lo; | |
147 } | |
148 | |
149 | |
150 template <typename Char> | |
151 int URIUnescape::UnescapeChar(Vector<const Char> vector, | |
152 int i, | |
153 int length, | |
154 int* step) { | |
155 uint16_t character = vector[i]; | |
156 int32_t hi = 0; | |
157 int32_t lo = 0; | |
158 if (character == '%' && | |
159 i <= length - 6 && | |
160 vector[i + 1] == 'u' && | |
161 (hi = TwoDigitHex(vector[i + 2], | |
162 vector[i + 3])) != -1 && | |
163 (lo = TwoDigitHex(vector[i + 4], | |
164 vector[i + 5])) != -1) { | |
165 *step = 6; | |
166 return (hi << 8) + lo; | |
167 } else if (character == '%' && | |
168 i <= length - 3 && | |
169 (lo = TwoDigitHex(vector[i + 1], | |
170 vector[i + 2])) != -1) { | |
171 *step = 3; | |
172 return lo; | |
173 } else { | |
174 *step = 1; | |
175 return character; | |
176 } | |
177 } | |
178 | |
179 | |
180 class URIEscape : public AllStatic { | |
181 public: | |
182 template<typename Char> | |
183 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate, | |
184 Handle<String> string); | |
185 | |
186 private: | |
187 static const char kHexChars[17]; | |
188 static const char kNotEscaped[256]; | |
189 | |
190 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } | |
191 }; | |
192 | |
193 | |
194 const char URIEscape::kHexChars[] = "0123456789ABCDEF"; | |
195 | |
196 | |
197 // kNotEscaped is generated by the following: | |
198 // | |
199 // #!/bin/perl | |
200 // for (my $i = 0; $i < 256; $i++) { | |
201 // print "\n" if $i % 16 == 0; | |
202 // my $c = chr($i); | |
203 // my $escaped = 1; | |
204 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; | |
205 // print $escaped ? "0, " : "1, "; | |
206 // } | |
207 | |
208 const char URIEscape::kNotEscaped[] = { | |
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, | |
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
213 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, | |
215 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | |
225 | |
226 | |
227 template<typename Char> | |
228 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { | |
229 DCHECK(string->IsFlat()); | |
230 int escaped_length = 0; | |
231 int length = string->length(); | |
232 | |
233 { DisallowHeapAllocation no_allocation; | |
234 Vector<const Char> vector = GetCharVector<Char>(string); | |
235 for (int i = 0; i < length; i++) { | |
236 uint16_t c = vector[i]; | |
237 if (c >= 256) { | |
238 escaped_length += 6; | |
239 } else if (IsNotEscaped(c)) { | |
240 escaped_length++; | |
241 } else { | |
242 escaped_length += 3; | |
243 } | |
244 | |
245 // We don't allow strings that are longer than a maximal length. | |
246 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. | |
247 if (escaped_length > String::kMaxLength) break; // Provoke exception. | |
248 } | |
249 } | |
250 | |
251 // No length change implies no change. Return original string if no change. | |
252 if (escaped_length == length) return string; | |
253 | |
254 Handle<SeqOneByteString> dest; | |
255 ASSIGN_RETURN_ON_EXCEPTION( | |
256 isolate, dest, | |
257 isolate->factory()->NewRawOneByteString(escaped_length), | |
258 String); | |
259 int dest_position = 0; | |
260 | |
261 { DisallowHeapAllocation no_allocation; | |
262 Vector<const Char> vector = GetCharVector<Char>(string); | |
263 for (int i = 0; i < length; i++) { | |
264 uint16_t c = vector[i]; | |
265 if (c >= 256) { | |
266 dest->SeqOneByteStringSet(dest_position, '%'); | |
267 dest->SeqOneByteStringSet(dest_position+1, 'u'); | |
268 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); | |
269 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); | |
270 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); | |
271 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); | |
272 dest_position += 6; | |
273 } else if (IsNotEscaped(c)) { | |
274 dest->SeqOneByteStringSet(dest_position, c); | |
275 dest_position++; | |
276 } else { | |
277 dest->SeqOneByteStringSet(dest_position, '%'); | |
278 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); | |
279 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); | |
280 dest_position += 3; | |
281 } | |
282 } | |
283 } | |
284 | |
285 return dest; | |
286 } | |
287 | |
288 } } // namespace v8::internal | |
289 | |
290 #endif // V8_URI_H_ | |
OLD | NEW |