OLD | NEW |
(Empty) | |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are |
| 4 // met: |
| 5 // |
| 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided |
| 11 // with the distribution. |
| 12 // * Neither the name of Google Inc. nor the names of its |
| 13 // contributors may be used to endorse or promote products derived |
| 14 // from this software without specific prior written permission. |
| 15 // |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 // limitations under the License. |
| 28 |
| 29 // ECMAScript 402 API implementation is broken into separate files for |
| 30 // each service. The build system combines them together into one |
| 31 // Intl namespace. |
| 32 |
| 33 /** |
| 34 * Canonicalizes the language tag, or throws in case the tag is invalid. |
| 35 */ |
| 36 function canonicalizeLanguageTag(localeID) { |
| 37 native function NativeJSCanonicalizeLanguageTag(); |
| 38 |
| 39 // null is typeof 'object' so we have to do extra check. |
| 40 if (typeof localeID !== 'string' && typeof localeID !== 'object' || |
| 41 localeID === null) { |
| 42 throw new TypeError('Language ID should be string or object.'); |
| 43 } |
| 44 |
| 45 var localeString = String(localeID); |
| 46 |
| 47 if (isValidLanguageTag(localeString) === false) { |
| 48 throw new RangeError('Invalid language tag: ' + localeString); |
| 49 } |
| 50 |
| 51 // This call will strip -kn but not -kn-true extensions. |
| 52 // ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265. |
| 53 // TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after |
| 54 // upgrade to ICU 4.9. |
| 55 var tag = NativeJSCanonicalizeLanguageTag(localeString); |
| 56 if (tag === 'invalid-tag') { |
| 57 throw new RangeError('Invalid language tag: ' + localeString); |
| 58 } |
| 59 |
| 60 return tag; |
| 61 } |
| 62 |
| 63 |
| 64 /** |
| 65 * Returns an array where all locales are canonicalized and duplicates removed. |
| 66 * Throws on locales that are not well formed BCP47 tags. |
| 67 */ |
| 68 function initializeLocaleList(locales) { |
| 69 var seen = []; |
| 70 if (locales === undefined) { |
| 71 // Constructor is called without arguments. |
| 72 seen = []; |
| 73 } else { |
| 74 // We allow single string localeID. |
| 75 if (typeof locales === 'string') { |
| 76 seen.push(canonicalizeLanguageTag(locales)); |
| 77 return freezeArray(seen); |
| 78 } |
| 79 |
| 80 var o = toObject(locales); |
| 81 // Converts it to UInt32 (>>> is shr on 32bit integers). |
| 82 var len = o.length >>> 0; |
| 83 |
| 84 for (var k = 0; k < len; k++) { |
| 85 if (k in o) { |
| 86 var value = o[k]; |
| 87 |
| 88 var tag = canonicalizeLanguageTag(value); |
| 89 |
| 90 if (seen.indexOf(tag) === -1) { |
| 91 seen.push(tag); |
| 92 } |
| 93 } |
| 94 } |
| 95 } |
| 96 |
| 97 return freezeArray(seen); |
| 98 } |
| 99 |
| 100 |
| 101 /** |
| 102 * Validates the language tag. Section 2.2.9 of the bcp47 spec |
| 103 * defines a valid tag. |
| 104 * |
| 105 * ICU is too permissible and lets invalid tags, like |
| 106 * hant-cmn-cn, through. |
| 107 * |
| 108 * Returns false if the language tag is invalid. |
| 109 */ |
| 110 function isValidLanguageTag(locale) { |
| 111 // Check if it's well-formed, including grandfadered tags. |
| 112 if (LANGUAGE_TAG_RE.test(locale) === false) { |
| 113 return false; |
| 114 } |
| 115 |
| 116 // Just return if it's a x- form. It's all private. |
| 117 if (locale.indexOf('x-') === 0) { |
| 118 return true; |
| 119 } |
| 120 |
| 121 // Check if there are any duplicate variants or singletons (extensions). |
| 122 |
| 123 // Remove private use section. |
| 124 locale = locale.split(/-x-/)[0]; |
| 125 |
| 126 // Skip language since it can match variant regex, so we start from 1. |
| 127 // We are matching i-klingon here, but that's ok, since i-klingon-klingon |
| 128 // is not valid and would fail LANGUAGE_TAG_RE test. |
| 129 var variants = []; |
| 130 var extensions = []; |
| 131 var parts = locale.split(/-/); |
| 132 for (var i = 1; i < parts.length; i++) { |
| 133 var value = parts[i]; |
| 134 if (LANGUAGE_VARIANT_RE.test(value) === true && extensions.length === 0) { |
| 135 if (variants.indexOf(value) === -1) { |
| 136 variants.push(value); |
| 137 } else { |
| 138 return false; |
| 139 } |
| 140 } |
| 141 |
| 142 if (LANGUAGE_SINGLETON_RE.test(value) === true) { |
| 143 if (extensions.indexOf(value) === -1) { |
| 144 extensions.push(value); |
| 145 } else { |
| 146 return false; |
| 147 } |
| 148 } |
| 149 } |
| 150 |
| 151 return true; |
| 152 } |
| 153 |
| 154 |
| 155 /** |
| 156 * Builds a regular expresion that validates the language tag |
| 157 * against bcp47 spec. |
| 158 * Uses http://tools.ietf.org/html/bcp47, section 2.1, ABNF. |
| 159 * Runs on load and initializes the global REs. |
| 160 */ |
| 161 (function() { |
| 162 var alpha = '[a-zA-Z]'; |
| 163 var digit = '[0-9]'; |
| 164 var alphanum = '(' + alpha + '|' + digit + ')'; |
| 165 var regular = '(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|' + |
| 166 'zh-min|zh-min-nan|zh-xiang)'; |
| 167 var irregular = '(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|' + |
| 168 'i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|' + |
| 169 'i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)'; |
| 170 var grandfathered = '(' + irregular + '|' + regular + ')'; |
| 171 var privateUse = '(x(-' + alphanum + '{1,8})+)'; |
| 172 |
| 173 var singleton = '(' + digit + '|[A-WY-Za-wy-z])'; |
| 174 LANGUAGE_SINGLETON_RE = new RegExp('^' + singleton + '$', 'i'); |
| 175 |
| 176 var extension = '(' + singleton + '(-' + alphanum + '{2,8})+)'; |
| 177 |
| 178 var variant = '(' + alphanum + '{5,8}|(' + digit + alphanum + '{3}))'; |
| 179 LANGUAGE_VARIANT_RE = new RegExp('^' + variant + '$', 'i'); |
| 180 |
| 181 var region = '(' + alpha + '{2}|' + digit + '{3})'; |
| 182 var script = '(' + alpha + '{4})'; |
| 183 var extLang = '(' + alpha + '{3}(-' + alpha + '{3}){0,2})'; |
| 184 var language = '(' + alpha + '{2,3}(-' + extLang + ')?|' + alpha + '{4}|' + |
| 185 alpha + '{5,8})'; |
| 186 var langTag = language + '(-' + script + ')?(-' + region + ')?(-' + |
| 187 variant + ')*(-' + extension + ')*(-' + privateUse + ')?'; |
| 188 |
| 189 var languageTag = |
| 190 '^(' + langTag + '|' + privateUse + '|' + grandfathered + ')$'; |
| 191 LANGUAGE_TAG_RE = new RegExp(languageTag, 'i'); |
| 192 })(); |
OLD | NEW |