Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(233)

Side by Side Diff: third_party/js/punycode.js

Issue 848513004: Mojo JS Bindings: URL class (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*! http://mths.be/punycode v1.2.3 by @mathias */
abarth-chromium 2015/01/13 20:32:35 Is there a license for this code?
2 define(function() {
3
4 /**
5 * The `punycode` object.
6 * @name punycode
7 * @type Object
8 */
9 var punycode,
10
11 /** Highest positive signed 32-bit float value */
12 maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
13
14 /** Bootstring parameters */
15 base = 36,
16 tMin = 1,
17 tMax = 26,
18 skew = 38,
19 damp = 700,
20 initialBias = 72,
21 initialN = 128, // 0x80
22 delimiter = '-', // '\x2D'
23
24 /** Regular expressions */
25 regexPunycode = /^xn--/,
26 regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
27 regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
28
29 /** Error messages */
30 errors = {
31 'overflow': 'Overflow: input needs wider integers to process',
32 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
33 'invalid-input': 'Invalid input'
34 },
35
36 /** Convenience shortcuts */
37 baseMinusTMin = base - tMin,
38 floor = Math.floor,
39 stringFromCharCode = String.fromCharCode,
40
41 /** Temporary variable */
42 key;
43
44 /*---------------------------------------------------------------------- ----*/
45
46 /**
47 * A generic error utility function.
48 * @private
49 * @param {String} type The error type.
50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.
51 */
52 function error(type) {
53 throw RangeError(errors[type]);
54 }
55
56 /**
57 * A generic `Array#map` utility function.
58 * @private
59 * @param {Array} array The array to iterate over.
60 * @param {Function} callback The function that gets called for every ar ray
61 * item.
62 * @returns {Array} A new array of values returned by the callback funct ion.
63 */
64 function map(array, fn) {
65 var length = array.length;
66 while (length--) {
67 array[length] = fn(array[length]);
68 }
69 return array;
70 }
71
72 /**
73 * A simple `Array#map`-like wrapper to work with domain name strings.
74 * @private
75 * @param {String} domain The domain name.
76 * @param {Function} callback The function that gets called for every
77 * character.
78 * @returns {Array} A new string of characters returned by the callback
79 * function.
80 */
81 function mapDomain(string, fn) {
82 return map(string.split(regexSeparators), fn).join('.');
83 }
84
85 /**
86 * Creates an array containing the numeric code points of each Unicode
87 * character in the string. While JavaScript uses UCS-2 internally,
88 * this function will convert a pair of surrogate halves (each of which
89 * UCS-2 exposes as separate characters) into a single code point,
90 * matching UTF-16.
91 * @see `punycode.ucs2.encode`
92 * @see <http://mathiasbynens.be/notes/javascript-encoding>
93 * @memberOf punycode.ucs2
94 * @name decode
95 * @param {String} string The Unicode input string (UCS-2).
96 * @returns {Array} The new array of code points.
97 */
98 function ucs2decode(string) {
99 var output = [],
100 counter = 0,
101 length = string.length,
102 value,
103 extra;
104 while (counter < length) {
105 value = string.charCodeAt(counter++);
106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {
107 // high surrogate, and there is a next character
108 extra = string.charCodeAt(counter++);
109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te
110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);
111 } else {
112 // unmatched surrogate; only append this code unit, in case the next
113 // code unit is the high surrogate of a surrogate pair
114 output.push(value);
115 counter--;
116 }
117 } else {
118 output.push(value);
119 }
120 }
121 return output;
122 }
123
124 /**
125 * Creates a string based on an array of numeric code points.
126 * @see `punycode.ucs2.decode`
127 * @memberOf punycode.ucs2
128 * @name encode
129 * @param {Array} codePoints The array of numeric code points.
130 * @returns {String} The new Unicode string (UCS-2).
131 */
132 function ucs2encode(array) {
133 return map(array, function(value) {
134 var output = '';
135 if (value > 0xFFFF) {
136 value -= 0x10000;
137 output += stringFromCharCode(value >>> 10 & 0x3F F | 0xD800);
138 value = 0xDC00 | value & 0x3FF;
139 }
140 output += stringFromCharCode(value);
141 return output;
142 }).join('');
143 }
144
145 /**
146 * Converts a basic code point into a digit/integer.
147 * @see `digitToBasic()`
148 * @private
149 * @param {Number} codePoint The basic numeric code point value.
150 * @returns {Number} The numeric value of a basic code point (for use in
151 * representing integers) in the range `0` to `base - 1`, or `base` if
152 * the code point does not represent a value.
153 */
154 function basicToDigit(codePoint) {
155 if (codePoint - 48 < 10) {
156 return codePoint - 22;
157 }
158 if (codePoint - 65 < 26) {
159 return codePoint - 65;
160 }
161 if (codePoint - 97 < 26) {
162 return codePoint - 97;
163 }
164 return base;
165 }
166
167 /**
168 * Converts a digit/integer into a basic code point.
169 * @see `basicToDigit()`
170 * @private
171 * @param {Number} digit The numeric value of a basic code point.
172 * @returns {Number} The basic code point whose value (when used for
173 * representing integers) is `digit`, which needs to be in the range
174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
175 * used; else, the lowercase form is used. The behavior is undefined
176 * if `flag` is non-zero and `digit` has no uppercase form.
177 */
178 function digitToBasic(digit, flag) {
179 // 0..25 map to ASCII a..z or A..Z
180 // 26..35 map to ASCII 0..9
181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
182 }
183
184 /**
185 * Bias adaptation function as per section 3.4 of RFC 3492.
186 * http://tools.ietf.org/html/rfc3492#section-3.4
187 * @private
188 */
189 function adapt(delta, numPoints, firstTime) {
190 var k = 0;
191 delta = firstTime ? floor(delta / damp) : delta >> 1;
192 delta += floor(delta / numPoints);
193 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
194 delta = floor(delta / baseMinusTMin);
195 }
196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
197 }
198
199 /**
200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de
201 * symbols.
202 * @memberOf punycode
203 * @param {String} input The Punycode string of ASCII-only symbols.
204 * @returns {String} The resulting string of Unicode symbols.
205 */
206 function decode(input) {
207 // Don't use UCS-2
208 var output = [],
209 inputLength = input.length,
210 out,
211 i = 0,
212 n = initialN,
213 bias = initialBias,
214 basic,
215 j,
216 index,
217 oldi,
218 w,
219 k,
220 digit,
221 t,
222 /** Cached calculation results */
223 baseMinusT;
224
225 // Handle the basic code points: let `basic` be the number of in put code
226 // points before the last delimiter, or `0` if there is none, th en copy
227 // the first basic code points to the output.
228
229 basic = input.lastIndexOf(delimiter);
230 if (basic < 0) {
231 basic = 0;
232 }
233
234 for (j = 0; j < basic; ++j) {
235 // if it's not a basic code point
236 if (input.charCodeAt(j) >= 0x80) {
237 error('not-basic');
238 }
239 output.push(input.charCodeAt(j));
240 }
241
242 // Main decoding loop: start just after the last delimiter if an y basic code
243 // points were copied; start at the beginning otherwise.
244
245 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
246
247 // `index` is the index of the next character to be cons umed.
248 // Decode a generalized variable-length integer into `de lta`,
249 // which gets added to `i`. The overflow checking is eas ier
250 // if we increase `i` as we go, then subtract off its st arting
251 // value at the end to obtain `delta`.
252 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
253
254 if (index >= inputLength) {
255 error('invalid-input');
256 }
257
258 digit = basicToDigit(input.charCodeAt(index++));
259
260 if (digit >= base || digit > floor((maxInt - i) / w)) {
261 error('overflow');
262 }
263
264 i += digit * w;
265 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
266
267 if (digit < t) {
268 break;
269 }
270
271 baseMinusT = base - t;
272 if (w > floor(maxInt / baseMinusT)) {
273 error('overflow');
274 }
275
276 w *= baseMinusT;
277
278 }
279
280 out = output.length + 1;
281 bias = adapt(i - oldi, out, oldi == 0);
282
283 // `i` was supposed to wrap around from `out` to `0`,
284 // incrementing `n` each time, so we'll fix that now:
285 if (floor(i / out) > maxInt - n) {
286 error('overflow');
287 }
288
289 n += floor(i / out);
290 i %= out;
291
292 // Insert `n` at position `i` of the output
293 output.splice(i++, 0, n);
294
295 }
296
297 return ucs2encode(output);
298 }
299
300 /**
301 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly
302 * symbols.
303 * @memberOf punycode
304 * @param {String} input The string of Unicode symbols.
305 * @returns {String} The resulting Punycode string of ASCII-only symbols .
306 */
307 function encode(input) {
308 var n,
309 delta,
310 handledCPCount,
311 basicLength,
312 bias,
313 j,
314 m,
315 q,
316 k,
317 t,
318 currentValue,
319 output = [],
320 /** `inputLength` will hold the number of code points in `in put`. */
321 inputLength,
322 /** Cached calculation results */
323 handledCPCountPlusOne,
324 baseMinusT,
325 qMinusT;
326
327 // Convert the input in UCS-2 to Unicode
328 input = ucs2decode(input);
329
330 // Cache the length
331 inputLength = input.length;
332
333 // Initialize the state
334 n = initialN;
335 delta = 0;
336 bias = initialBias;
337
338 // Handle the basic code points
339 for (j = 0; j < inputLength; ++j) {
340 currentValue = input[j];
341 if (currentValue < 0x80) {
342 output.push(stringFromCharCode(currentValue));
343 }
344 }
345
346 handledCPCount = basicLength = output.length;
347
348 // `handledCPCount` is the number of code points that have been handled;
349 // `basicLength` is the number of basic code points.
350
351 // Finish the basic string - if it is not empty - with a delimit er
352 if (basicLength) {
353 output.push(delimiter);
354 }
355
356 // Main encoding loop:
357 while (handledCPCount < inputLength) {
358
359 // All non-basic code points < n have been handled alrea dy. Find the next
360 // larger one:
361 for (m = maxInt, j = 0; j < inputLength; ++j) {
362 currentValue = input[j];
363 if (currentValue >= n && currentValue < m) {
364 m = currentValue;
365 }
366 }
367
368 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,
369 // but guard against overflow
370 handledCPCountPlusOne = handledCPCount + 1;
371 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {
372 error('overflow');
373 }
374
375 delta += (m - n) * handledCPCountPlusOne;
376 n = m;
377
378 for (j = 0; j < inputLength; ++j) {
379 currentValue = input[j];
380
381 if (currentValue < n && ++delta > maxInt) {
382 error('overflow');
383 }
384
385 if (currentValue == n) {
386 // Represent delta as a generalized vari able-length integer
387 for (q = delta, k = base; /* no conditio n */; k += base) {
388 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);
389 if (q < t) {
390 break;
391 }
392 qMinusT = q - t;
393 baseMinusT = base - t;
394 output.push(
395 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))
396 );
397 q = floor(qMinusT / baseMinusT);
398 }
399
400 output.push(stringFromCharCode(digitToBa sic(q, 0)));
401 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);
402 delta = 0;
403 ++handledCPCount;
404 }
405 }
406
407 ++delta;
408 ++n;
409
410 }
411 return output.join('');
412 }
413
414 /**
415 * Converts a Punycode string representing a domain name to Unicode. Onl y the
416 * Punycoded parts of the domain name will be converted, i.e. it doesn't
417 * matter if you call it on a string that has already been converted to
418 * Unicode.
419 * @memberOf punycode
420 * @param {String} domain The Punycode domain name to convert to Unicode .
421 * @returns {String} The Unicode representation of the given Punycode
422 * string.
423 */
424 function toUnicode(domain) {
425 return mapDomain(domain, function(string) {
426 return regexPunycode.test(string)
427 ? decode(string.slice(4).toLowerCase())
428 : string;
429 });
430 }
431
432 /**
433 * Converts a Unicode string representing a domain name to Punycode. Onl y the
434 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
435 * matter if you call it with a domain that's already in ASCII.
436 * @memberOf punycode
437 * @param {String} domain The domain name to convert, as a Unicode strin g.
438 * @returns {String} The Punycode representation of the given domain nam e.
439 */
440 function toASCII(domain) {
441 return mapDomain(domain, function(string) {
442 return regexNonASCII.test(string)
443 ? 'xn--' + encode(string)
444 : string;
445 });
446 }
447
448 /*---------------------------------------------------------------------- ----*/
449
450 /** Define the public API */
451 punycode = {
452 /**
453 * A string representing the current Punycode.js version number.
454 * @memberOf punycode
455 * @type String
456 */
457 'version': '1.2.3',
458 /**
459 * An object of methods to convert from JavaScript's internal ch aracter
460 * representation (UCS-2) to Unicode code points, and back.
461 * @see <http://mathiasbynens.be/notes/javascript-encoding>
462 * @memberOf punycode
463 * @type Object
464 */
465 'ucs2': {
466 'decode': ucs2decode,
467 'encode': ucs2encode
468 },
469 'decode': decode,
470 'encode': encode,
471 'toASCII': toASCII,
472 'toUnicode': toUnicode
473 };
474
475 return punycode;
476 });
OLDNEW
« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698