third_party/js/punycode.js - Issue 848513004: Mojo JS Bindings: URL class - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(233)

My Issues | Starred Open | Closed | All

Side by Side Diff: third_party/js/punycode.js

Issue 848513004: Mojo JS Bindings: URL class (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /! http://mths.be/punycode v1.2.3 by @mathias /
	abarth-chromium 2015/01/13 20:32:35 Is there a license for this code? Is there a license for this code?
	2 define(function() {

	3

	4 /**

	5 * The `punycode` object.

	6 * @name punycode

	7 * @type Object

	8 */

	9 var punycode,

	10

	11 /** Highest positive signed 32-bit float value */

	12 maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1

	13

	14 /** Bootstring parameters */

	15 base = 36,

	16 tMin = 1,

	17 tMax = 26,

	18 skew = 38,

	19 damp = 700,

	20 initialBias = 72,

	21 initialN = 128, // 0x80

	22 delimiter = '-', // '\x2D'

	23

	24 /** Regular expressions */

	25 regexPunycode = /^xn--/,

	26 regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars

	27 regexSeparators = /\x2E\|\u3002\|\uFF0E\|\uFF61/g, // RFC 3490 separators

	28

	29 /** Error messages */

	30 errors = {

	31 'overflow': 'Overflow: input needs wider integers to process',

	32 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',

	33 'invalid-input': 'Invalid input'

	34 },

	35

	36 /** Convenience shortcuts */

	37 baseMinusTMin = base - tMin,

	38 floor = Math.floor,

	39 stringFromCharCode = String.fromCharCode,

	40

	41 /** Temporary variable */

	42 key;

	43

	44 /---------------------------------------------------------------------- ----/

	45

	46 /**

	47 * A generic error utility function.

	48 * @private

	49 * @param {String} type The error type.

	50 * @returns {Error} Throws a `RangeError` with the applicable error mess age.

	51 */

	52 function error(type) {

	53 throw RangeError(errors[type]);

	54 }

	55

	56 /**

	57 * A generic `Array#map` utility function.

	58 * @private

	59 * @param {Array} array The array to iterate over.

	60 * @param {Function} callback The function that gets called for every ar ray

	61 * item.

	62 * @returns {Array} A new array of values returned by the callback funct ion.

	63 */

	64 function map(array, fn) {

	65 var length = array.length;

	66 while (length--) {

	67 array[length] = fn(array[length]);

	68 }

	69 return array;

	70 }

	71

	72 /**

	73 * A simple `Array#map`-like wrapper to work with domain name strings.

	74 * @private

	75 * @param {String} domain The domain name.

	76 * @param {Function} callback The function that gets called for every

	77 * character.

	78 * @returns {Array} A new string of characters returned by the callback

	79 * function.

	80 */

	81 function mapDomain(string, fn) {

	82 return map(string.split(regexSeparators), fn).join('.');

	83 }

	84

	85 /**

	86 * Creates an array containing the numeric code points of each Unicode

	87 * character in the string. While JavaScript uses UCS-2 internally,

	88 * this function will convert a pair of surrogate halves (each of which

	89 * UCS-2 exposes as separate characters) into a single code point,

	90 * matching UTF-16.

	91 * @see `punycode.ucs2.encode`

	92 * @see <http://mathiasbynens.be/notes/javascript-encoding>

	93 * @memberOf punycode.ucs2

	94 * @name decode

	95 * @param {String} string The Unicode input string (UCS-2).

	96 * @returns {Array} The new array of code points.

	97 */

	98 function ucs2decode(string) {

	99 var output = [],

	100 counter = 0,

	101 length = string.length,

	102 value,

	103 extra;

	104 while (counter < length) {

	105 value = string.charCodeAt(counter++);

	106 if (value >= 0xD800 && value <= 0xDBFF && counter < leng th) {

	107 // high surrogate, and there is a next character

	108 extra = string.charCodeAt(counter++);

	109 if ((extra & 0xFC00) == 0xDC00) { // low surroga te

	110 output.push(((value & 0x3FF) << 10) + (e xtra & 0x3FF) + 0x10000);

	111 } else {

	112 // unmatched surrogate; only append this code unit, in case the next

	113 // code unit is the high surrogate of a surrogate pair

	114 output.push(value);

	115 counter--;

	116 }

	117 } else {

	118 output.push(value);

	119 }

	120 }

	121 return output;

	122 }

	123

	124 /**

	125 * Creates a string based on an array of numeric code points.

	126 * @see `punycode.ucs2.decode`

	127 * @memberOf punycode.ucs2

	128 * @name encode

	129 * @param {Array} codePoints The array of numeric code points.

	130 * @returns {String} The new Unicode string (UCS-2).

	131 */

	132 function ucs2encode(array) {

	133 return map(array, function(value) {

	134 var output = '';

	135 if (value > 0xFFFF) {

	136 value -= 0x10000;

	137 output += stringFromCharCode(value >>> 10 & 0x3F F \| 0xD800);

	138 value = 0xDC00 \| value & 0x3FF;

	139 }

	140 output += stringFromCharCode(value);

	141 return output;

	142 }).join('');

	143 }

	144

	145 /**

	146 * Converts a basic code point into a digit/integer.

	147 * @see `digitToBasic()`

	148 * @private

	149 * @param {Number} codePoint The basic numeric code point value.

	150 * @returns {Number} The numeric value of a basic code point (for use in

	151 * representing integers) in the range `0` to `base - 1`, or `base` if

	152 * the code point does not represent a value.

	153 */

	154 function basicToDigit(codePoint) {

	155 if (codePoint - 48 < 10) {

	156 return codePoint - 22;

	157 }

	158 if (codePoint - 65 < 26) {

	159 return codePoint - 65;

	160 }

	161 if (codePoint - 97 < 26) {

	162 return codePoint - 97;

	163 }

	164 return base;

	165 }

	166

	167 /**

	168 * Converts a digit/integer into a basic code point.

	169 * @see `basicToDigit()`

	170 * @private

	171 * @param {Number} digit The numeric value of a basic code point.

	172 * @returns {Number} The basic code point whose value (when used for

	173 * representing integers) is `digit`, which needs to be in the range

	174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is

	175 * used; else, the lowercase form is used. The behavior is undefined

	176 * if `flag` is non-zero and `digit` has no uppercase form.

	177 */

	178 function digitToBasic(digit, flag) {

	179 // 0..25 map to ASCII a..z or A..Z

	180 // 26..35 map to ASCII 0..9

	181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);

	182 }

	183

	184 /**

	185 * Bias adaptation function as per section 3.4 of RFC 3492.

	186 * http://tools.ietf.org/html/rfc3492#section-3.4

	187 * @private

	188 */

	189 function adapt(delta, numPoints, firstTime) {

	190 var k = 0;

	191 delta = firstTime ? floor(delta / damp) : delta >> 1;

	192 delta += floor(delta / numPoints);

	193 for (/* no initialization /; delta > baseMinusTMin tMax >> 1; k += base) {

	194 delta = floor(delta / baseMinusTMin);

	195 }

	196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));

	197 }

	198

	199 /**

	200 * Converts a Punycode string of ASCII-only symbols to a string of Unico de

	201 * symbols.

	202 * @memberOf punycode

	203 * @param {String} input The Punycode string of ASCII-only symbols.

	204 * @returns {String} The resulting string of Unicode symbols.

	205 */

	206 function decode(input) {

	207 // Don't use UCS-2

	208 var output = [],

	209 inputLength = input.length,

	210 out,

	211 i = 0,

	212 n = initialN,

	213 bias = initialBias,

	214 basic,

	215 j,

	216 index,

	217 oldi,

	218 w,

	219 k,

	220 digit,

	221 t,

	222 /** Cached calculation results */

	223 baseMinusT;

	224

	225 // Handle the basic code points: let `basic` be the number of in put code

	226 // points before the last delimiter, or `0` if there is none, th en copy

	227 // the first basic code points to the output.

	228

	229 basic = input.lastIndexOf(delimiter);

	230 if (basic < 0) {

	231 basic = 0;

	232 }

	233

	234 for (j = 0; j < basic; ++j) {

	235 // if it's not a basic code point

	236 if (input.charCodeAt(j) >= 0x80) {

	237 error('not-basic');

	238 }

	239 output.push(input.charCodeAt(j));

	240 }

	241

	242 // Main decoding loop: start just after the last delimiter if an y basic code

	243 // points were copied; start at the beginning otherwise.

	244

	245 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

	246

	247 // `index` is the index of the next character to be cons umed.

	248 // Decode a generalized variable-length integer into `de lta`,

	249 // which gets added to `i`. The overflow checking is eas ier

	250 // if we increase `i` as we go, then subtract off its st arting

	251 // value at the end to obtain `delta`.

	252 for (oldi = i, w = 1, k = base; /* no condition */; k += base) {

	253

	254 if (index >= inputLength) {

	255 error('invalid-input');

	256 }

	257

	258 digit = basicToDigit(input.charCodeAt(index++));

	259

	260 if (digit >= base \|\| digit > floor((maxInt - i) / w)) {

	261 error('overflow');

	262 }

	263

	264 i += digit * w;

	265 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);

	266

	267 if (digit < t) {

	268 break;

	269 }

	270

	271 baseMinusT = base - t;

	272 if (w > floor(maxInt / baseMinusT)) {

	273 error('overflow');

	274 }

	275

	276 w *= baseMinusT;

	277

	278 }

	279

	280 out = output.length + 1;

	281 bias = adapt(i - oldi, out, oldi == 0);

	282

	283 // `i` was supposed to wrap around from `out` to `0`,

	284 // incrementing `n` each time, so we'll fix that now:

	285 if (floor(i / out) > maxInt - n) {

	286 error('overflow');

	287 }

	288

	289 n += floor(i / out);

	290 i %= out;

	291

	292 // Insert `n` at position `i` of the output

	293 output.splice(i++, 0, n);

	294

	295 }

	296

	297 return ucs2encode(output);

	298 }

	299

	300 /**

	301 * Converts a string of Unicode symbols to a Punycode string of ASCII-on ly

	302 * symbols.

	303 * @memberOf punycode

	304 * @param {String} input The string of Unicode symbols.

	305 * @returns {String} The resulting Punycode string of ASCII-only symbols .

	306 */

	307 function encode(input) {

	308 var n,

	309 delta,

	310 handledCPCount,

	311 basicLength,

	312 bias,

	313 j,

	314 m,

	315 q,

	316 k,

	317 t,

	318 currentValue,

	319 output = [],

	320 /** `inputLength` will hold the number of code points in `in put`. */

	321 inputLength,

	322 /** Cached calculation results */

	323 handledCPCountPlusOne,

	324 baseMinusT,

	325 qMinusT;

	326

	327 // Convert the input in UCS-2 to Unicode

	328 input = ucs2decode(input);

	329

	330 // Cache the length

	331 inputLength = input.length;

	332

	333 // Initialize the state

	334 n = initialN;

	335 delta = 0;

	336 bias = initialBias;

	337

	338 // Handle the basic code points

	339 for (j = 0; j < inputLength; ++j) {

	340 currentValue = input[j];

	341 if (currentValue < 0x80) {

	342 output.push(stringFromCharCode(currentValue));

	343 }

	344 }

	345

	346 handledCPCount = basicLength = output.length;

	347

	348 // `handledCPCount` is the number of code points that have been handled;

	349 // `basicLength` is the number of basic code points.

	350

	351 // Finish the basic string - if it is not empty - with a delimit er

	352 if (basicLength) {

	353 output.push(delimiter);

	354 }

	355

	356 // Main encoding loop:

	357 while (handledCPCount < inputLength) {

	358

	359 // All non-basic code points < n have been handled alrea dy. Find the next

	360 // larger one:

	361 for (m = maxInt, j = 0; j < inputLength; ++j) {

	362 currentValue = input[j];

	363 if (currentValue >= n && currentValue < m) {

	364 m = currentValue;

	365 }

	366 }

	367

	368 // Increase `delta` enough to advance the decoder's <n,i > state to <m,0>,

	369 // but guard against overflow

	370 handledCPCountPlusOne = handledCPCount + 1;

	371 if (m - n > floor((maxInt - delta) / handledCPCountPlusO ne)) {

	372 error('overflow');

	373 }

	374

	375 delta += (m - n) * handledCPCountPlusOne;

	376 n = m;

	377

	378 for (j = 0; j < inputLength; ++j) {

	379 currentValue = input[j];

	380

	381 if (currentValue < n && ++delta > maxInt) {

	382 error('overflow');

	383 }

	384

	385 if (currentValue == n) {

	386 // Represent delta as a generalized vari able-length integer

	387 for (q = delta, k = base; /* no conditio n */; k += base) {

	388 t = k <= bias ? tMin : (k >= bia s + tMax ? tMax : k - bias);

	389 if (q < t) {

	390 break;

	391 }

	392 qMinusT = q - t;

	393 baseMinusT = base - t;

	394 output.push(

	395 stringFromCharCode(digit ToBasic(t + qMinusT % baseMinusT, 0))

	396 );

	397 q = floor(qMinusT / baseMinusT);

	398 }

	399

	400 output.push(stringFromCharCode(digitToBa sic(q, 0)));

	401 bias = adapt(delta, handledCPCountPlusOn e, handledCPCount == basicLength);

	402 delta = 0;

	403 ++handledCPCount;

	404 }

	405 }

	406

	407 ++delta;

	408 ++n;

	409

	410 }

	411 return output.join('');

	412 }

	413

	414 /**

	415 * Converts a Punycode string representing a domain name to Unicode. Onl y the

	416 * Punycoded parts of the domain name will be converted, i.e. it doesn't

	417 * matter if you call it on a string that has already been converted to

	418 * Unicode.

	419 * @memberOf punycode

	420 * @param {String} domain The Punycode domain name to convert to Unicode .

	421 * @returns {String} The Unicode representation of the given Punycode

	422 * string.

	423 */

	424 function toUnicode(domain) {

	425 return mapDomain(domain, function(string) {

	426 return regexPunycode.test(string)

	427 ? decode(string.slice(4).toLowerCase())

	428 : string;

	429 });

	430 }

	431

	432 /**

	433 * Converts a Unicode string representing a domain name to Punycode. Onl y the

	434 * non-ASCII parts of the domain name will be converted, i.e. it doesn't

	435 * matter if you call it with a domain that's already in ASCII.

	436 * @memberOf punycode

	437 * @param {String} domain The domain name to convert, as a Unicode strin g.

	438 * @returns {String} The Punycode representation of the given domain nam e.

	439 */

	440 function toASCII(domain) {

	441 return mapDomain(domain, function(string) {

	442 return regexNonASCII.test(string)

	443 ? 'xn--' + encode(string)

	444 : string;

	445 });

	446 }

	447

	448 /---------------------------------------------------------------------- ----/

	449

	450 /** Define the public API */

	451 punycode = {

	452 /**

	453 * A string representing the current Punycode.js version number.

	454 * @memberOf punycode

	455 * @type String

	456 */

	457 'version': '1.2.3',

	458 /**

	459 * An object of methods to convert from JavaScript's internal ch aracter

	460 * representation (UCS-2) to Unicode code points, and back.

	461 * @see <http://mathiasbynens.be/notes/javascript-encoding>

	462 * @memberOf punycode

	463 * @type Object

	464 */

	465 'ucs2': {

	466 'decode': ucs2decode,

	467 'encode': ucs2encode

	468 },

	469 'decode': decode,

	470 'encode': encode,

	471 'toASCII': toASCII,

	472 'toUnicode': toUnicode

	473 };

	474

	475 return punycode;

	476 });

OLD	NEW

« no previous file with comments | « third_party/js/README.md ('k') | third_party/js/querystring.js » ('j') | no next file with comments »

Powered by Google App Engine

This is Rietveld 408576698