OLD | NEW |
| (Empty) |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This file contains support for URI manipulations written in | |
6 // JavaScript. | |
7 | |
8 (function(global, utils) { | |
9 | |
10 "use strict"; | |
11 | |
12 %CheckIsBootstrapping(); | |
13 | |
14 //- ------------------------------------------------------------------ | |
15 // Imports | |
16 | |
17 var GlobalObject = global.Object; | |
18 var GlobalArray = global.Array; | |
19 var InternalArray = utils.InternalArray; | |
20 | |
21 // ------------------------------------------------------------------- | |
22 // Define internal helper functions. | |
23 | |
24 function HexValueOf(code) { | |
25 // 0-9 | |
26 if (code >= 48 && code <= 57) return code - 48; | |
27 // A-F | |
28 if (code >= 65 && code <= 70) return code - 55; | |
29 // a-f | |
30 if (code >= 97 && code <= 102) return code - 87; | |
31 | |
32 return -1; | |
33 } | |
34 | |
35 // Does the char code correspond to an alpha-numeric char. | |
36 function isAlphaNumeric(cc) { | |
37 // a - z | |
38 if (97 <= cc && cc <= 122) return true; | |
39 // A - Z | |
40 if (65 <= cc && cc <= 90) return true; | |
41 // 0 - 9 | |
42 if (48 <= cc && cc <= 57) return true; | |
43 | |
44 return false; | |
45 } | |
46 | |
47 // Lazily initialized. | |
48 var hexCharCodeArray = 0; | |
49 | |
50 function URIAddEncodedOctetToBuffer(octet, result, index) { | |
51 result[index++] = 37; // Char code of '%'. | |
52 result[index++] = hexCharCodeArray[octet >> 4]; | |
53 result[index++] = hexCharCodeArray[octet & 0x0F]; | |
54 return index; | |
55 } | |
56 | |
57 function URIEncodeOctets(octets, result, index) { | |
58 if (hexCharCodeArray === 0) { | |
59 hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, | |
60 65, 66, 67, 68, 69, 70]; | |
61 } | |
62 index = URIAddEncodedOctetToBuffer(octets[0], result, index); | |
63 if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index); | |
64 if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index); | |
65 if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index); | |
66 return index; | |
67 } | |
68 | |
69 function URIEncodeSingle(cc, result, index) { | |
70 var x = (cc >> 12) & 0xF; | |
71 var y = (cc >> 6) & 63; | |
72 var z = cc & 63; | |
73 var octets = new GlobalArray(3); | |
74 if (cc <= 0x007F) { | |
75 octets[0] = cc; | |
76 } else if (cc <= 0x07FF) { | |
77 octets[0] = y + 192; | |
78 octets[1] = z + 128; | |
79 } else { | |
80 octets[0] = x + 224; | |
81 octets[1] = y + 128; | |
82 octets[2] = z + 128; | |
83 } | |
84 return URIEncodeOctets(octets, result, index); | |
85 } | |
86 | |
87 function URIEncodePair(cc1 , cc2, result, index) { | |
88 var u = ((cc1 >> 6) & 0xF) + 1; | |
89 var w = (cc1 >> 2) & 0xF; | |
90 var x = cc1 & 3; | |
91 var y = (cc2 >> 6) & 0xF; | |
92 var z = cc2 & 63; | |
93 var octets = new GlobalArray(4); | |
94 octets[0] = (u >> 2) + 240; | |
95 octets[1] = (((u & 3) << 4) | w) + 128; | |
96 octets[2] = ((x << 4) | y) + 128; | |
97 octets[3] = z + 128; | |
98 return URIEncodeOctets(octets, result, index); | |
99 } | |
100 | |
101 function URIHexCharsToCharCode(highChar, lowChar) { | |
102 var highCode = HexValueOf(highChar); | |
103 var lowCode = HexValueOf(lowChar); | |
104 if (highCode == -1 || lowCode == -1) throw MakeURIError(); | |
105 return (highCode << 4) | lowCode; | |
106 } | |
107 | |
108 // Callers must ensure that |result| is a sufficiently long sequential | |
109 // two-byte string! | |
110 function URIDecodeOctets(octets, result, index) { | |
111 var value; | |
112 var o0 = octets[0]; | |
113 if (o0 < 0x80) { | |
114 value = o0; | |
115 } else if (o0 < 0xc2) { | |
116 throw MakeURIError(); | |
117 } else { | |
118 var o1 = octets[1]; | |
119 if (o0 < 0xe0) { | |
120 var a = o0 & 0x1f; | |
121 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); | |
122 var b = o1 & 0x3f; | |
123 value = (a << 6) + b; | |
124 if (value < 0x80 || value > 0x7ff) throw MakeURIError(); | |
125 } else { | |
126 var o2 = octets[2]; | |
127 if (o0 < 0xf0) { | |
128 var a = o0 & 0x0f; | |
129 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); | |
130 var b = o1 & 0x3f; | |
131 if ((o2 < 0x80) || (o2 > 0xbf)) throw MakeURIError(); | |
132 var c = o2 & 0x3f; | |
133 value = (a << 12) + (b << 6) + c; | |
134 if ((value < 0x800) || (value > 0xffff)) throw MakeURIError(); | |
135 } else { | |
136 var o3 = octets[3]; | |
137 if (o0 < 0xf8) { | |
138 var a = (o0 & 0x07); | |
139 if ((o1 < 0x80) || (o1 > 0xbf)) throw MakeURIError(); | |
140 var b = (o1 & 0x3f); | |
141 if ((o2 < 0x80) || (o2 > 0xbf)) { | |
142 throw MakeURIError(); | |
143 } | |
144 var c = (o2 & 0x3f); | |
145 if ((o3 < 0x80) || (o3 > 0xbf)) throw MakeURIError(); | |
146 var d = (o3 & 0x3f); | |
147 value = (a << 18) + (b << 12) + (c << 6) + d; | |
148 if ((value < 0x10000) || (value > 0x10ffff)) throw MakeURIError(); | |
149 } else { | |
150 throw MakeURIError(); | |
151 } | |
152 } | |
153 } | |
154 } | |
155 if (0xD800 <= value && value <= 0xDFFF) throw MakeURIError(); | |
156 if (value < 0x10000) { | |
157 %_TwoByteSeqStringSetChar(index++, value, result); | |
158 } else { | |
159 %_TwoByteSeqStringSetChar(index++, (value >> 10) + 0xd7c0, result); | |
160 %_TwoByteSeqStringSetChar(index++, (value & 0x3ff) + 0xdc00, result); | |
161 } | |
162 return index; | |
163 } | |
164 | |
165 // ECMA-262, section 15.1.3 | |
166 function Encode(uri, unescape) { | |
167 uri = TO_STRING(uri); | |
168 var uriLength = uri.length; | |
169 var array = new InternalArray(uriLength); | |
170 var index = 0; | |
171 for (var k = 0; k < uriLength; k++) { | |
172 var cc1 = %_StringCharCodeAt(uri, k); | |
173 if (unescape(cc1)) { | |
174 array[index++] = cc1; | |
175 } else { | |
176 if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw MakeURIError(); | |
177 if (cc1 < 0xD800 || cc1 > 0xDBFF) { | |
178 index = URIEncodeSingle(cc1, array, index); | |
179 } else { | |
180 k++; | |
181 if (k == uriLength) throw MakeURIError(); | |
182 var cc2 = %_StringCharCodeAt(uri, k); | |
183 if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw MakeURIError(); | |
184 index = URIEncodePair(cc1, cc2, array, index); | |
185 } | |
186 } | |
187 } | |
188 | |
189 var result = %NewString(array.length, NEW_ONE_BYTE_STRING); | |
190 for (var i = 0; i < array.length; i++) { | |
191 %_OneByteSeqStringSetChar(i, array[i], result); | |
192 } | |
193 return result; | |
194 } | |
195 | |
196 // ECMA-262, section 15.1.3 | |
197 function Decode(uri, reserved) { | |
198 uri = TO_STRING(uri); | |
199 var uriLength = uri.length; | |
200 var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING); | |
201 var index = 0; | |
202 var k = 0; | |
203 | |
204 // Optimistically assume one-byte string. | |
205 for ( ; k < uriLength; k++) { | |
206 var code = %_StringCharCodeAt(uri, k); | |
207 if (code == 37) { // '%' | |
208 if (k + 2 >= uriLength) throw MakeURIError(); | |
209 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, k+1), | |
210 %_StringCharCodeAt(uri, k+2)); | |
211 if (cc >> 7) break; // Assumption wrong, two-byte string. | |
212 if (reserved(cc)) { | |
213 %_OneByteSeqStringSetChar(index++, 37, one_byte); // '%'. | |
214 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+1), | |
215 one_byte); | |
216 %_OneByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k+2), | |
217 one_byte); | |
218 } else { | |
219 %_OneByteSeqStringSetChar(index++, cc, one_byte); | |
220 } | |
221 k += 2; | |
222 } else { | |
223 if (code > 0x7f) break; // Assumption wrong, two-byte string. | |
224 %_OneByteSeqStringSetChar(index++, code, one_byte); | |
225 } | |
226 } | |
227 | |
228 one_byte = %TruncateString(one_byte, index); | |
229 if (k == uriLength) return one_byte; | |
230 | |
231 // Write into two byte string. | |
232 var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING); | |
233 index = 0; | |
234 | |
235 for ( ; k < uriLength; k++) { | |
236 var code = %_StringCharCodeAt(uri, k); | |
237 if (code == 37) { // '%' | |
238 if (k + 2 >= uriLength) throw MakeURIError(); | |
239 var cc = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k), | |
240 %_StringCharCodeAt(uri, ++k)); | |
241 if (cc >> 7) { | |
242 var n = 0; | |
243 while (((cc << ++n) & 0x80) != 0) { } | |
244 if (n == 1 || n > 4) throw MakeURIError(); | |
245 var octets = new GlobalArray(n); | |
246 octets[0] = cc; | |
247 if (k + 3 * (n - 1) >= uriLength) throw MakeURIError(); | |
248 for (var i = 1; i < n; i++) { | |
249 if (uri[++k] != '%') throw MakeURIError(); | |
250 octets[i] = URIHexCharsToCharCode(%_StringCharCodeAt(uri, ++k), | |
251 %_StringCharCodeAt(uri, ++k)); | |
252 } | |
253 index = URIDecodeOctets(octets, two_byte, index); | |
254 } else if (reserved(cc)) { | |
255 %_TwoByteSeqStringSetChar(index++, 37, two_byte); // '%'. | |
256 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k - 1), | |
257 two_byte); | |
258 %_TwoByteSeqStringSetChar(index++, %_StringCharCodeAt(uri, k), | |
259 two_byte); | |
260 } else { | |
261 %_TwoByteSeqStringSetChar(index++, cc, two_byte); | |
262 } | |
263 } else { | |
264 %_TwoByteSeqStringSetChar(index++, code, two_byte); | |
265 } | |
266 } | |
267 | |
268 two_byte = %TruncateString(two_byte, index); | |
269 return one_byte + two_byte; | |
270 } | |
271 | |
272 // ------------------------------------------------------------------- | |
273 // Define exported functions. | |
274 | |
275 // ECMA-262 - B.2.1. | |
276 function URIEscapeJS(s) { | |
277 return %URIEscape(s); | |
278 } | |
279 | |
280 // ECMA-262 - B.2.2. | |
281 function URIUnescapeJS(s) { | |
282 return %URIUnescape(s); | |
283 } | |
284 | |
285 // ECMA-262 - 15.1.3.1. | |
286 function URIDecode(uri) { | |
287 var reservedPredicate = function(cc) { | |
288 // #$ | |
289 if (35 <= cc && cc <= 36) return true; | |
290 // & | |
291 if (cc == 38) return true; | |
292 // +, | |
293 if (43 <= cc && cc <= 44) return true; | |
294 // / | |
295 if (cc == 47) return true; | |
296 // :; | |
297 if (58 <= cc && cc <= 59) return true; | |
298 // = | |
299 if (cc == 61) return true; | |
300 // ?@ | |
301 if (63 <= cc && cc <= 64) return true; | |
302 | |
303 return false; | |
304 }; | |
305 return Decode(uri, reservedPredicate); | |
306 } | |
307 | |
308 // ECMA-262 - 15.1.3.2. | |
309 function URIDecodeComponent(component) { | |
310 var reservedPredicate = function(cc) { return false; }; | |
311 return Decode(component, reservedPredicate); | |
312 } | |
313 | |
314 // ECMA-262 - 15.1.3.3. | |
315 function URIEncode(uri) { | |
316 var unescapePredicate = function(cc) { | |
317 if (isAlphaNumeric(cc)) return true; | |
318 // ! | |
319 if (cc == 33) return true; | |
320 // #$ | |
321 if (35 <= cc && cc <= 36) return true; | |
322 // &'()*+,-./ | |
323 if (38 <= cc && cc <= 47) return true; | |
324 // :; | |
325 if (58 <= cc && cc <= 59) return true; | |
326 // = | |
327 if (cc == 61) return true; | |
328 // ?@ | |
329 if (63 <= cc && cc <= 64) return true; | |
330 // _ | |
331 if (cc == 95) return true; | |
332 // ~ | |
333 if (cc == 126) return true; | |
334 | |
335 return false; | |
336 }; | |
337 return Encode(uri, unescapePredicate); | |
338 } | |
339 | |
340 // ECMA-262 - 15.1.3.4 | |
341 function URIEncodeComponent(component) { | |
342 var unescapePredicate = function(cc) { | |
343 if (isAlphaNumeric(cc)) return true; | |
344 // ! | |
345 if (cc == 33) return true; | |
346 // '()* | |
347 if (39 <= cc && cc <= 42) return true; | |
348 // -. | |
349 if (45 <= cc && cc <= 46) return true; | |
350 // _ | |
351 if (cc == 95) return true; | |
352 // ~ | |
353 if (cc == 126) return true; | |
354 | |
355 return false; | |
356 }; | |
357 return Encode(component, unescapePredicate); | |
358 } | |
359 | |
360 // ------------------------------------------------------------------- | |
361 // Install exported functions. | |
362 | |
363 // Set up non-enumerable URI functions on the global object and set | |
364 // their names. | |
365 utils.InstallFunctions(global, DONT_ENUM, [ | |
366 "escape", URIEscapeJS, | |
367 "unescape", URIUnescapeJS, | |
368 "decodeURI", URIDecode, | |
369 "decodeURIComponent", URIDecodeComponent, | |
370 "encodeURI", URIEncode, | |
371 "encodeURIComponent", URIEncodeComponent | |
372 ]); | |
373 | |
374 }) | |
OLD | NEW |