third_party/js/punycode.js - Issue 848513004: Mojo JS Bindings: URL class - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(559)

My Issues | Starred Open | Closed | All

Side by Side Diff: third_party/js/punycode.js

Issue 848513004: Mojo JS Bindings: URL class (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /! http://mths.be/punycode v1.2.3 by @mathias /

	2 // Copyright Mathias Bynens <https://mathiasbynens.be/>

	3 //

	4 // Permission is hereby granted, free of charge, to any person obtaining

	5 // a copy of this software and associated documentation files (the

	6 // "Software"), to deal in the Software without restriction, including

	7 // without limitation the rights to use, copy, modify, merge, publish,

	8 // distribute, sublicense, and/or sell copies of the Software, and to

	9 // permit persons to whom the Software is furnished to do so, subject to

	10 // the following conditions:

	11 //

	12 // The above copyright notice and this permission notice shall be

	13 // included in all copies or substantial portions of the Software.

	14 //

	15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

	16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

	17 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

	18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

	19 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

	20 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

	21 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	22

	23 define(function() {

	24

	25 /**

	26 * The `punycode` object.

	27 * @name punycode

	28 * @type Object

	29 */

	30 var punycode,

	31

	32 /** Highest positive signed 32-bit float value */

	33 maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1

	34

	35 /** Bootstring parameters */

	36 base = 36,

	37 tMin = 1,

	38 tMax = 26,

	39 skew = 38,

	40 damp = 700,

	41 initialBias = 72,

	42 initialN = 128, // 0x80

	43 delimiter = '-', // '\x2D'

	44

	45 /** Regular expressions */

	46 regexPunycode = /^xn--/,

	47 regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars

	48 regexSeparators = /\x2E\|\u3002\|\uFF0E\|\uFF61/g, // RFC 3490 separators

	49

	50 /** Error messages */

	51 errors = {

	52 'overflow': 'Overflow: input needs wider integers to process',

	53 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

	54 'invalid-input': 'Invalid input'

	55 },

	56

	57 /** Convenience shortcuts */

	58 baseMinusTMin = base - tMin,

	59 floor = Math.floor,

	60 stringFromCharCode = String.fromCharCode,

	61

	62 /** Temporary variable */

	63 key;

	64

	65 /---------------------------------------------------------------------- ----/

	66

	67 /**

	68 * A generic error utility function.

	69 * @private

	70 * @param {String} type The error type.

	71 * @returns {Error} Throws a `RangeError` with the applicable error mess age.

	72 */

	73 function error(type) {

	74 throw RangeError(errors[type]);

	75 }

	76

	77 /**

	78 * A generic `Array#map` utility function.

	79 * @private

	80 * @param {Array} array The array to iterate over.

	81 * @param {Function} callback The function that gets called for every ar ray

	82 * item.

	83 * @returns {Array} A new array of values returned by the callback funct ion.

	84 */

	85 function map(array, fn) {

	86 var length = array.length;

	87 while (length--) {

	88 array[length] = fn(array[length]);

	89 }

	90 return array;

	91 }

	92

	93 /**

	94 * A simple `Array#map`-like wrapper to work with domain name strings.

	95 * @private

	96 * @param {String} domain The domain name.

	97 * @param {Function} callback The function that gets called for every

	98 * character.

	99 * @returns {Array} A new string of characters returned by the callback

	100 * function.

	101 */

	102 function mapDomain(string, fn) {

	103 return map(string.split(regexSeparators), fn).join('.');

	104 }

	105

	106 /**

	107 * Creates an array containing the numeric code points of each Unicode

	108 * character in the string. While JavaScript uses UCS-2 internally,

	109 * this function will convert a pair of surrogate halves (each of which

	110 * UCS-2 exposes as separate characters) into a single code point,

	111 * matching UTF-16.

	112 * @see `punycode.ucs2.encode`

	113 * @see <http://mathiasbynens.be/notes/javascript-encoding>

	114 * @memberOf punycode.ucs2

	115 * @name decode

	116 * @param {String} string The Unicode input string (UCS-2).

	117 * @returns {Array} The new array of code points.

	118 */

	119 function ucs2decode(string) {

	120 var output = [],

	121 counter = 0,

	122 length = string.length,

	123 value,

	124 extra;

	125 while (counter < length) {

	126 value = string.charCodeAt(counter++);

	127 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {

	128 // high surrogate, and there is a next character

	129 extra = string.charCodeAt(counter++);

	130 if ((extra & 0xFC00) == 0xDC00) { // low surroga te

	131 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);

	132 } else {

	133 // unmatched surrogate; only append this code unit, in case the next

	134 // code unit is the high surrogate of a surrogate pair

	135 output.push(value);

	136 counter--;

	137 }

	138 } else {

	139 output.push(value);

	140 }

	141 }

	142 return output;

	143 }

	144

	145 /**

	146 * Creates a string based on an array of numeric code points.

	147 * @see `punycode.ucs2.decode`

	148 * @memberOf punycode.ucs2

	149 * @name encode

	150 * @param {Array} codePoints The array of numeric code points.

	151 * @returns {String} The new Unicode string (UCS-2).

	152 */

	153 function ucs2encode(array) {

	154 return map(array, function(value) {

	155 var output = '';

	156 if (value > 0xFFFF) {

	157 value -= 0x10000;

	158 output += stringFromCharCode(value >>> 10 & 0x3F F \| 0xD800);

	159 value = 0xDC00 \| value & 0x3FF;

	160 }

	161 output += stringFromCharCode(value);

	162 return output;

	163 }).join('');

	164 }

	165

	166 /**

	167 * Converts a basic code point into a digit/integer.

	168 * @see `digitToBasic()`

	169 * @private

	170 * @param {Number} codePoint The basic numeric code point value.

	171 * @returns {Number} The numeric value of a basic code point (for use in

	172 * representing integers) in the range `0` to `base - 1`, or `base` if

	173 * the code point does not represent a value.

	174 */

	175 function basicToDigit(codePoint) {

	176 if (codePoint - 48 < 10) {

	177 return codePoint - 22;

	178 }

	179 if (codePoint - 65 < 26) {

	180 return codePoint - 65;

	181 }

	182 if (codePoint - 97 < 26) {

	183 return codePoint - 97;

	184 }

	185 return base;

	186 }

	187

	188 /**

	189 * Converts a digit/integer into a basic code point.

	190 * @see `basicToDigit()`

	191 * @private

	192 * @param {Number} digit The numeric value of a basic code point.

	193 * @returns {Number} The basic code point whose value (when used for

	194 * representing integers) is `digit`, which needs to be in the range

	195 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

	196 * used; else, the lowercase form is used. The behavior is undefined

	197 * if `flag` is non-zero and `digit` has no uppercase form.

	198 */

	199 function digitToBasic(digit, flag) {

	200 // 0..25 map to ASCII a..z or A..Z

	201 // 26..35 map to ASCII 0..9

	202 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

	203 }

	204

	205 /**

	206 * Bias adaptation function as per section 3.4 of RFC 3492.

	207 * http://tools.ietf.org/html/rfc3492#section-3.4

	208 * @private

	209 */

	210 function adapt(delta, numPoints, firstTime) {

	211 var k = 0;

	212 delta = firstTime ? floor(delta / damp) : delta >> 1;

	213 delta += floor(delta / numPoints);

	214 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += base) {

	215 delta = floor(delta / baseMinusTMin);

	216 }

	217 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

	218 }

	219

	220 /**

	221 * Converts a Punycode string of ASCII-only symbols to a string of Unico de

	222 * symbols.

	223 * @memberOf punycode

	224 * @param {String} input The Punycode string of ASCII-only symbols.

	225 * @returns {String} The resulting string of Unicode symbols.

	226 */

	227 function decode(input) {

	228 // Don't use UCS-2

	229 var output = [],

	230 inputLength = input.length,

	231 out,

	232 i = 0,

	233 n = initialN,

	234 bias = initialBias,

	235 basic,

	236 j,

	237 index,

	238 oldi,

	239 w,

	240 k,

	241 digit,

	242 t,

	243 /** Cached calculation results */

	244 baseMinusT;

	245

	246 // Handle the basic code points: let `basic` be the number of in put code

	247 // points before the last delimiter, or `0` if there is none, th en copy

	248 // the first basic code points to the output.

	249

	250 basic = input.lastIndexOf(delimiter);

	251 if (basic < 0) {

	252 basic = 0;

	253 }

	254

	255 for (j = 0; j < basic; ++j) {

	256 // if it's not a basic code point

	257 if (input.charCodeAt(j) >= 0x80) {

	258 error('not-basic');

	259 }

	260 output.push(input.charCodeAt(j));

	261 }

	262

	263 // Main decoding loop: start just after the last delimiter if an y basic code

	264 // points were copied; start at the beginning otherwise.

	265

	266 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

	267

	268 // `index` is the index of the next character to be cons umed.

	269 // Decode a generalized variable-length integer into `de lta`,

	270 // which gets added to `i`. The overflow checking is eas ier

	271 // if we increase `i` as we go, then subtract off its st arting

	272 // value at the end to obtain `delta`.

	273 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {

	274

	275 if (index >= inputLength) {

	276 error('invalid-input');

	277 }

	278

	279 digit = basicToDigit(input.charCodeAt(index++));

	280

	281 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

	282 error('overflow');

	283 }

	284

	285 i += digit * w;

	286 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	287

	288 if (digit < t) {

	289 break;

	290 }

	291

	292 baseMinusT = base - t;

	293 if (w > floor(maxInt / baseMinusT)) {

	294 error('overflow');

	295 }

	296

	297 w *= baseMinusT;

	298

	299 }

	300

	301 out = output.length + 1;

	302 bias = adapt(i - oldi, out, oldi == 0);

	303

	304 // `i` was supposed to wrap around from `out` to `0`,

	305 // incrementing `n` each time, so we'll fix that now:

	306 if (floor(i / out) > maxInt - n) {

	307 error('overflow');

	308 }

	309

	310 n += floor(i / out);

	311 i %= out;

	312

	313 // Insert `n` at position `i` of the output

	314 output.splice(i++, 0, n);

	315

	316 }

	317

	318 return ucs2encode(output);

	319 }

	320

	321 /**

	322 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly

	323 * symbols.

	324 * @memberOf punycode

	325 * @param {String} input The string of Unicode symbols.

	326 * @returns {String} The resulting Punycode string of ASCII-only symbols .

	327 */

	328 function encode(input) {

	329 var n,

	330 delta,

	331 handledCPCount,

	332 basicLength,

	333 bias,

	334 j,

	335 m,

	336 q,

	337 k,

	338 t,

	339 currentValue,

	340 output = [],

	341 /** `inputLength` will hold the number of code points in `in put`. */

	342 inputLength,

	343 /** Cached calculation results */

	344 handledCPCountPlusOne,

	345 baseMinusT,

	346 qMinusT;

	347

	348 // Convert the input in UCS-2 to Unicode

	349 input = ucs2decode(input);

	350

	351 // Cache the length

	352 inputLength = input.length;

	353

	354 // Initialize the state

	355 n = initialN;

	356 delta = 0;

	357 bias = initialBias;

	358

	359 // Handle the basic code points

	360 for (j = 0; j < inputLength; ++j) {

	361 currentValue = input[j];

	362 if (currentValue < 0x80) {

	363 output.push(stringFromCharCode(currentValue));

	364 }

	365 }

	366

	367 handledCPCount = basicLength = output.length;

	368

	369 // `handledCPCount` is the number of code points that have been handled;

	370 // `basicLength` is the number of basic code points.

	371

	372 // Finish the basic string - if it is not empty - with a delimit er

	373 if (basicLength) {

	374 output.push(delimiter);

	375 }

	376

	377 // Main encoding loop:

	378 while (handledCPCount < inputLength) {

	379

	380 // All non-basic code points < n have been handled alrea dy. Find the next

	381 // larger one:

	382 for (m = maxInt, j = 0; j < inputLength; ++j) {

	383 currentValue = input[j];

	384 if (currentValue >= n && currentValue < m) {

	385 m = currentValue;

	386 }

	387 }

	388

	389 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,

	390 // but guard against overflow

	391 handledCPCountPlusOne = handledCPCount + 1;

	392 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {

	393 error('overflow');

	394 }

	395

	396 delta += (m - n) * handledCPCountPlusOne;

	397 n = m;

	398

	399 for (j = 0; j < inputLength; ++j) {

	400 currentValue = input[j];

	401

	402 if (currentValue < n && ++delta > maxInt) {

	403 error('overflow');

	404 }

	405

	406 if (currentValue == n) {

	407 // Represent delta as a generalized vari able-length integer

	408 for (q = delta, k = base; /* no conditio n */; k += base) {

	409 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);

	410 if (q < t) {

	411 break;

	412 }

	413 qMinusT = q - t;

	414 baseMinusT = base - t;

	415 output.push(

	416 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))

	417 );

	418 q = floor(qMinusT / baseMinusT);

	419 }

	420

	421 output.push(stringFromCharCode(digitToBa sic(q, 0)));

	422 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);

	423 delta = 0;

	424 ++handledCPCount;

	425 }

	426 }

	427

	428 ++delta;

	429 ++n;

	430

	431 }

	432 return output.join('');

	433 }

	434

	435 /**

	436 * Converts a Punycode string representing a domain name to Unicode. Onl y the

	437 * Punycoded parts of the domain name will be converted, i.e. it doesn't

	438 * matter if you call it on a string that has already been converted to

	439 * Unicode.

	440 * @memberOf punycode

	441 * @param {String} domain The Punycode domain name to convert to Unicode .

	442 * @returns {String} The Unicode representation of the given Punycode

	443 * string.

	444 */

	445 function toUnicode(domain) {

	446 return mapDomain(domain, function(string) {

	447 return regexPunycode.test(string)

	448 ? decode(string.slice(4).toLowerCase())

	449 : string;

	450 });

	451 }

	452

	453 /**

	454 * Converts a Unicode string representing a domain name to Punycode. Onl y the

	455 * non-ASCII parts of the domain name will be converted, i.e. it doesn't

	456 * matter if you call it with a domain that's already in ASCII.

	457 * @memberOf punycode

	458 * @param {String} domain The domain name to convert, as a Unicode strin g.

	459 * @returns {String} The Punycode representation of the given domain nam e.

	460 */

	461 function toASCII(domain) {

	462 return mapDomain(domain, function(string) {

	463 return regexNonASCII.test(string)

	464 ? 'xn--' + encode(string)

	465 : string;

	466 });

	467 }

	468

	469 /---------------------------------------------------------------------- ----/

	470

	471 /** Define the public API */

	472 punycode = {

	473 /**

	474 * A string representing the current Punycode.js version number.

	475 * @memberOf punycode

	476 * @type String

	477 */

	478 'version': '1.2.3',

	479 /**

	480 * An object of methods to convert from JavaScript's internal ch aracter

	481 * representation (UCS-2) to Unicode code points, and back.

	482 * @see <http://mathiasbynens.be/notes/javascript-encoding>

	483 * @memberOf punycode

	484 * @type Object

	485 */

	486 'ucs2': {

	487 'decode': ucs2decode,

	488 'encode': ucs2encode

	489 },

	490 'decode': decode,

	491 'encode': encode,

	492 'toASCII': toASCII,

	493 'toUnicode': toUnicode

	494 };

	495

	496 return punycode;

	497 });

OLD	NEW

« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »

Powered by Google App Engine

This is Rietveld 408576698