Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(559)

Side by Side Diff: third_party/js/punycode.js

Issue 848513004: Mojo JS Bindings: URL class (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*! http://mths.be/punycode v1.2.3 by @mathias */
2 // Copyright Mathias Bynens <https://mathiasbynens.be/>
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining
5 // a copy of this software and associated documentation files (the
6 // "Software"), to deal in the Software without restriction, including
7 // without limitation the rights to use, copy, modify, merge, publish,
8 // distribute, sublicense, and/or sell copies of the Software, and to
9 // permit persons to whom the Software is furnished to do so, subject to
10 // the following conditions:
11 //
12 // The above copyright notice and this permission notice shall be
13 // included in all copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
23 define(function() {
24
25 /**
26 * The `punycode` object.
27 * @name punycode
28 * @type Object
29 */
30 var punycode,
31
32 /** Highest positive signed 32-bit float value */
33 maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
34
35 /** Bootstring parameters */
36 base = 36,
37 tMin = 1,
38 tMax = 26,
39 skew = 38,
40 damp = 700,
41 initialBias = 72,
42 initialN = 128, // 0x80
43 delimiter = '-', // '\x2D'
44
45 /** Regular expressions */
46 regexPunycode = /^xn--/,
47 regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
48 regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
49
50 /** Error messages */
51 errors = {
52 'overflow': 'Overflow: input needs wider integers to process',
53 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
54 'invalid-input': 'Invalid input'
55 },
56
57 /** Convenience shortcuts */
58 baseMinusTMin = base - tMin,
59 floor = Math.floor,
60 stringFromCharCode = String.fromCharCode,
61
62 /** Temporary variable */
63 key;
64
65 /*---------------------------------------------------------------------- ----*/
66
67 /**
68 * A generic error utility function.
69 * @private
70 * @param {String} type The error type.
71 * @returns {Error} Throws a `RangeError` with the applicable error mess age.
72 */
73 function error(type) {
74 throw RangeError(errors[type]);
75 }
76
77 /**
78 * A generic `Array#map` utility function.
79 * @private
80 * @param {Array} array The array to iterate over.
81 * @param {Function} callback The function that gets called for every ar ray
82 * item.
83 * @returns {Array} A new array of values returned by the callback funct ion.
84 */
85 function map(array, fn) {
86 var length = array.length;
87 while (length--) {
88 array[length] = fn(array[length]);
89 }
90 return array;
91 }
92
93 /**
94 * A simple `Array#map`-like wrapper to work with domain name strings.
95 * @private
96 * @param {String} domain The domain name.
97 * @param {Function} callback The function that gets called for every
98 * character.
99 * @returns {Array} A new string of characters returned by the callback
100 * function.
101 */
102 function mapDomain(string, fn) {
103 return map(string.split(regexSeparators), fn).join('.');
104 }
105
106 /**
107 * Creates an array containing the numeric code points of each Unicode
108 * character in the string. While JavaScript uses UCS-2 internally,
109 * this function will convert a pair of surrogate halves (each of which
110 * UCS-2 exposes as separate characters) into a single code point,
111 * matching UTF-16.
112 * @see `punycode.ucs2.encode`
113 * @see <http://mathiasbynens.be/notes/javascript-encoding>
114 * @memberOf punycode.ucs2
115 * @name decode
116 * @param {String} string The Unicode input string (UCS-2).
117 * @returns {Array} The new array of code points.
118 */
119 function ucs2decode(string) {
120 var output = [],
121 counter = 0,
122 length = string.length,
123 value,
124 extra;
125 while (counter < length) {
126 value = string.charCodeAt(counter++);
127 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {
128 // high surrogate, and there is a next character
129 extra = string.charCodeAt(counter++);
130 if ((extra & 0xFC00) == 0xDC00) { // low surroga te
131 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);
132 } else {
133 // unmatched surrogate; only append this code unit, in case the next
134 // code unit is the high surrogate of a surrogate pair
135 output.push(value);
136 counter--;
137 }
138 } else {
139 output.push(value);
140 }
141 }
142 return output;
143 }
144
145 /**
146 * Creates a string based on an array of numeric code points.
147 * @see `punycode.ucs2.decode`
148 * @memberOf punycode.ucs2
149 * @name encode
150 * @param {Array} codePoints The array of numeric code points.
151 * @returns {String} The new Unicode string (UCS-2).
152 */
153 function ucs2encode(array) {
154 return map(array, function(value) {
155 var output = '';
156 if (value > 0xFFFF) {
157 value -= 0x10000;
158 output += stringFromCharCode(value >>> 10 & 0x3F F | 0xD800);
159 value = 0xDC00 | value & 0x3FF;
160 }
161 output += stringFromCharCode(value);
162 return output;
163 }).join('');
164 }
165
166 /**
167 * Converts a basic code point into a digit/integer.
168 * @see `digitToBasic()`
169 * @private
170 * @param {Number} codePoint The basic numeric code point value.
171 * @returns {Number} The numeric value of a basic code point (for use in
172 * representing integers) in the range `0` to `base - 1`, or `base` if
173 * the code point does not represent a value.
174 */
175 function basicToDigit(codePoint) {
176 if (codePoint - 48 < 10) {
177 return codePoint - 22;
178 }
179 if (codePoint - 65 < 26) {
180 return codePoint - 65;
181 }
182 if (codePoint - 97 < 26) {
183 return codePoint - 97;
184 }
185 return base;
186 }
187
188 /**
189 * Converts a digit/integer into a basic code point.
190 * @see `basicToDigit()`
191 * @private
192 * @param {Number} digit The numeric value of a basic code point.
193 * @returns {Number} The basic code point whose value (when used for
194 * representing integers) is `digit`, which needs to be in the range
195 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
196 * used; else, the lowercase form is used. The behavior is undefined
197 * if `flag` is non-zero and `digit` has no uppercase form.
198 */
199 function digitToBasic(digit, flag) {
200 // 0..25 map to ASCII a..z or A..Z
201 // 26..35 map to ASCII 0..9
202 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
203 }
204
205 /**
206 * Bias adaptation function as per section 3.4 of RFC 3492.
207 * http://tools.ietf.org/html/rfc3492#section-3.4
208 * @private
209 */
210 function adapt(delta, numPoints, firstTime) {
211 var k = 0;
212 delta = firstTime ? floor(delta / damp) : delta >> 1;
213 delta += floor(delta / numPoints);
214 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
215 delta = floor(delta / baseMinusTMin);
216 }
217 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
218 }
219
220 /**
221 * Converts a Punycode string of ASCII-only symbols to a string of Unico de
222 * symbols.
223 * @memberOf punycode
224 * @param {String} input The Punycode string of ASCII-only symbols.
225 * @returns {String} The resulting string of Unicode symbols.
226 */
227 function decode(input) {
228 // Don't use UCS-2
229 var output = [],
230 inputLength = input.length,
231 out,
232 i = 0,
233 n = initialN,
234 bias = initialBias,
235 basic,
236 j,
237 index,
238 oldi,
239 w,
240 k,
241 digit,
242 t,
243 /** Cached calculation results */
244 baseMinusT;
245
246 // Handle the basic code points: let `basic` be the number of in put code
247 // points before the last delimiter, or `0` if there is none, th en copy
248 // the first basic code points to the output.
249
250 basic = input.lastIndexOf(delimiter);
251 if (basic < 0) {
252 basic = 0;
253 }
254
255 for (j = 0; j < basic; ++j) {
256 // if it's not a basic code point
257 if (input.charCodeAt(j) >= 0x80) {
258 error('not-basic');
259 }
260 output.push(input.charCodeAt(j));
261 }
262
263 // Main decoding loop: start just after the last delimiter if an y basic code
264 // points were copied; start at the beginning otherwise.
265
266 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
267
268 // `index` is the index of the next character to be cons umed.
269 // Decode a generalized variable-length integer into `de lta`,
270 // which gets added to `i`. The overflow checking is eas ier
271 // if we increase `i` as we go, then subtract off its st arting
272 // value at the end to obtain `delta`.
273 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
274
275 if (index >= inputLength) {
276 error('invalid-input');
277 }
278
279 digit = basicToDigit(input.charCodeAt(index++));
280
281 if (digit >= base || digit > floor((maxInt - i) / w)) {
282 error('overflow');
283 }
284
285 i += digit * w;
286 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
287
288 if (digit < t) {
289 break;
290 }
291
292 baseMinusT = base - t;
293 if (w > floor(maxInt / baseMinusT)) {
294 error('overflow');
295 }
296
297 w *= baseMinusT;
298
299 }
300
301 out = output.length + 1;
302 bias = adapt(i - oldi, out, oldi == 0);
303
304 // `i` was supposed to wrap around from `out` to `0`,
305 // incrementing `n` each time, so we'll fix that now:
306 if (floor(i / out) > maxInt - n) {
307 error('overflow');
308 }
309
310 n += floor(i / out);
311 i %= out;
312
313 // Insert `n` at position `i` of the output
314 output.splice(i++, 0, n);
315
316 }
317
318 return ucs2encode(output);
319 }
320
321 /**
322 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly
323 * symbols.
324 * @memberOf punycode
325 * @param {String} input The string of Unicode symbols.
326 * @returns {String} The resulting Punycode string of ASCII-only symbols .
327 */
328 function encode(input) {
329 var n,
330 delta,
331 handledCPCount,
332 basicLength,
333 bias,
334 j,
335 m,
336 q,
337 k,
338 t,
339 currentValue,
340 output = [],
341 /** `inputLength` will hold the number of code points in `in put`. */
342 inputLength,
343 /** Cached calculation results */
344 handledCPCountPlusOne,
345 baseMinusT,
346 qMinusT;
347
348 // Convert the input in UCS-2 to Unicode
349 input = ucs2decode(input);
350
351 // Cache the length
352 inputLength = input.length;
353
354 // Initialize the state
355 n = initialN;
356 delta = 0;
357 bias = initialBias;
358
359 // Handle the basic code points
360 for (j = 0; j < inputLength; ++j) {
361 currentValue = input[j];
362 if (currentValue < 0x80) {
363 output.push(stringFromCharCode(currentValue));
364 }
365 }
366
367 handledCPCount = basicLength = output.length;
368
369 // `handledCPCount` is the number of code points that have been handled;
370 // `basicLength` is the number of basic code points.
371
372 // Finish the basic string - if it is not empty - with a delimit er
373 if (basicLength) {
374 output.push(delimiter);
375 }
376
377 // Main encoding loop:
378 while (handledCPCount < inputLength) {
379
380 // All non-basic code points < n have been handled alrea dy. Find the next
381 // larger one:
382 for (m = maxInt, j = 0; j < inputLength; ++j) {
383 currentValue = input[j];
384 if (currentValue >= n && currentValue < m) {
385 m = currentValue;
386 }
387 }
388
389 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,
390 // but guard against overflow
391 handledCPCountPlusOne = handledCPCount + 1;
392 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {
393 error('overflow');
394 }
395
396 delta += (m - n) * handledCPCountPlusOne;
397 n = m;
398
399 for (j = 0; j < inputLength; ++j) {
400 currentValue = input[j];
401
402 if (currentValue < n && ++delta > maxInt) {
403 error('overflow');
404 }
405
406 if (currentValue == n) {
407 // Represent delta as a generalized vari able-length integer
408 for (q = delta, k = base; /* no conditio n */; k += base) {
409 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);
410 if (q < t) {
411 break;
412 }
413 qMinusT = q - t;
414 baseMinusT = base - t;
415 output.push(
416 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))
417 );
418 q = floor(qMinusT / baseMinusT);
419 }
420
421 output.push(stringFromCharCode(digitToBa sic(q, 0)));
422 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);
423 delta = 0;
424 ++handledCPCount;
425 }
426 }
427
428 ++delta;
429 ++n;
430
431 }
432 return output.join('');
433 }
434
435 /**
436 * Converts a Punycode string representing a domain name to Unicode. Onl y the
437 * Punycoded parts of the domain name will be converted, i.e. it doesn't
438 * matter if you call it on a string that has already been converted to
439 * Unicode.
440 * @memberOf punycode
441 * @param {String} domain The Punycode domain name to convert to Unicode .
442 * @returns {String} The Unicode representation of the given Punycode
443 * string.
444 */
445 function toUnicode(domain) {
446 return mapDomain(domain, function(string) {
447 return regexPunycode.test(string)
448 ? decode(string.slice(4).toLowerCase())
449 : string;
450 });
451 }
452
453 /**
454 * Converts a Unicode string representing a domain name to Punycode. Onl y the
455 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
456 * matter if you call it with a domain that's already in ASCII.
457 * @memberOf punycode
458 * @param {String} domain The domain name to convert, as a Unicode strin g.
459 * @returns {String} The Punycode representation of the given domain nam e.
460 */
461 function toASCII(domain) {
462 return mapDomain(domain, function(string) {
463 return regexNonASCII.test(string)
464 ? 'xn--' + encode(string)
465 : string;
466 });
467 }
468
469 /*---------------------------------------------------------------------- ----*/
470
471 /** Define the public API */
472 punycode = {
473 /**
474 * A string representing the current Punycode.js version number.
475 * @memberOf punycode
476 * @type String
477 */
478 'version': '1.2.3',
479 /**
480 * An object of methods to convert from JavaScript's internal ch aracter
481 * representation (UCS-2) to Unicode code points, and back.
482 * @see <http://mathiasbynens.be/notes/javascript-encoding>
483 * @memberOf punycode
484 * @type Object
485 */
486 'ucs2': {
487 'decode': ucs2decode,
488 'encode': ucs2encode
489 },
490 'decode': decode,
491 'encode': encode,
492 'toASCII': toASCII,
493 'toUnicode': toUnicode
494 };
495
496 return punycode;
497 });
OLDNEW
« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698