OLD | NEW |
| (Empty) |
1 // Copyright 2013 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 // limitations under the License. | |
28 | |
29 // ECMAScript 402 API implementation is broken into separate files for | |
30 // each service. The build system combines them together into one | |
31 // Intl namespace. | |
32 | |
33 /** | |
34 * Canonicalizes the language tag, or throws in case the tag is invalid. | |
35 */ | |
36 function canonicalizeLanguageTag(localeID) { | |
37 // null is typeof 'object' so we have to do extra check. | |
38 if (typeof localeID !== 'string' && typeof localeID !== 'object' || | |
39 localeID === null) { | |
40 throw new TypeError('Language ID should be string or object.'); | |
41 } | |
42 | |
43 var localeString = String(localeID); | |
44 | |
45 if (isValidLanguageTag(localeString) === false) { | |
46 throw new RangeError('Invalid language tag: ' + localeString); | |
47 } | |
48 | |
49 // This call will strip -kn but not -kn-true extensions. | |
50 // ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265. | |
51 // TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after | |
52 // upgrade to ICU 4.9. | |
53 var tag = %CanonicalizeLanguageTag(localeString); | |
54 if (tag === 'invalid-tag') { | |
55 throw new RangeError('Invalid language tag: ' + localeString); | |
56 } | |
57 | |
58 return tag; | |
59 } | |
60 | |
61 | |
62 /** | |
63 * Returns an array where all locales are canonicalized and duplicates removed. | |
64 * Throws on locales that are not well formed BCP47 tags. | |
65 */ | |
66 function initializeLocaleList(locales) { | |
67 var seen = []; | |
68 if (locales === undefined) { | |
69 // Constructor is called without arguments. | |
70 seen = []; | |
71 } else { | |
72 // We allow single string localeID. | |
73 if (typeof locales === 'string') { | |
74 seen.push(canonicalizeLanguageTag(locales)); | |
75 return freezeArray(seen); | |
76 } | |
77 | |
78 var o = toObject(locales); | |
79 // Converts it to UInt32 (>>> is shr on 32bit integers). | |
80 var len = o.length >>> 0; | |
81 | |
82 for (var k = 0; k < len; k++) { | |
83 if (k in o) { | |
84 var value = o[k]; | |
85 | |
86 var tag = canonicalizeLanguageTag(value); | |
87 | |
88 if (seen.indexOf(tag) === -1) { | |
89 seen.push(tag); | |
90 } | |
91 } | |
92 } | |
93 } | |
94 | |
95 return freezeArray(seen); | |
96 } | |
97 | |
98 | |
99 /** | |
100 * Validates the language tag. Section 2.2.9 of the bcp47 spec | |
101 * defines a valid tag. | |
102 * | |
103 * ICU is too permissible and lets invalid tags, like | |
104 * hant-cmn-cn, through. | |
105 * | |
106 * Returns false if the language tag is invalid. | |
107 */ | |
108 function isValidLanguageTag(locale) { | |
109 // Check if it's well-formed, including grandfadered tags. | |
110 if (LANGUAGE_TAG_RE.test(locale) === false) { | |
111 return false; | |
112 } | |
113 | |
114 // Just return if it's a x- form. It's all private. | |
115 if (locale.indexOf('x-') === 0) { | |
116 return true; | |
117 } | |
118 | |
119 // Check if there are any duplicate variants or singletons (extensions). | |
120 | |
121 // Remove private use section. | |
122 locale = locale.split(/-x-/)[0]; | |
123 | |
124 // Skip language since it can match variant regex, so we start from 1. | |
125 // We are matching i-klingon here, but that's ok, since i-klingon-klingon | |
126 // is not valid and would fail LANGUAGE_TAG_RE test. | |
127 var variants = []; | |
128 var extensions = []; | |
129 var parts = locale.split(/-/); | |
130 for (var i = 1; i < parts.length; i++) { | |
131 var value = parts[i]; | |
132 if (LANGUAGE_VARIANT_RE.test(value) === true && extensions.length === 0) { | |
133 if (variants.indexOf(value) === -1) { | |
134 variants.push(value); | |
135 } else { | |
136 return false; | |
137 } | |
138 } | |
139 | |
140 if (LANGUAGE_SINGLETON_RE.test(value) === true) { | |
141 if (extensions.indexOf(value) === -1) { | |
142 extensions.push(value); | |
143 } else { | |
144 return false; | |
145 } | |
146 } | |
147 } | |
148 | |
149 return true; | |
150 } | |
151 | |
152 | |
153 /** | |
154 * Builds a regular expresion that validates the language tag | |
155 * against bcp47 spec. | |
156 * Uses http://tools.ietf.org/html/bcp47, section 2.1, ABNF. | |
157 * Runs on load and initializes the global REs. | |
158 */ | |
159 (function() { | |
160 var alpha = '[a-zA-Z]'; | |
161 var digit = '[0-9]'; | |
162 var alphanum = '(' + alpha + '|' + digit + ')'; | |
163 var regular = '(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|' + | |
164 'zh-min|zh-min-nan|zh-xiang)'; | |
165 var irregular = '(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|' + | |
166 'i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|' + | |
167 'i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)'; | |
168 var grandfathered = '(' + irregular + '|' + regular + ')'; | |
169 var privateUse = '(x(-' + alphanum + '{1,8})+)'; | |
170 | |
171 var singleton = '(' + digit + '|[A-WY-Za-wy-z])'; | |
172 LANGUAGE_SINGLETON_RE = new RegExp('^' + singleton + '$', 'i'); | |
173 | |
174 var extension = '(' + singleton + '(-' + alphanum + '{2,8})+)'; | |
175 | |
176 var variant = '(' + alphanum + '{5,8}|(' + digit + alphanum + '{3}))'; | |
177 LANGUAGE_VARIANT_RE = new RegExp('^' + variant + '$', 'i'); | |
178 | |
179 var region = '(' + alpha + '{2}|' + digit + '{3})'; | |
180 var script = '(' + alpha + '{4})'; | |
181 var extLang = '(' + alpha + '{3}(-' + alpha + '{3}){0,2})'; | |
182 var language = '(' + alpha + '{2,3}(-' + extLang + ')?|' + alpha + '{4}|' + | |
183 alpha + '{5,8})'; | |
184 var langTag = language + '(-' + script + ')?(-' + region + ')?(-' + | |
185 variant + ')*(-' + extension + ')*(-' + privateUse + ')?'; | |
186 | |
187 var languageTag = | |
188 '^(' + langTag + '|' + privateUse + '|' + grandfathered + ')$'; | |
189 LANGUAGE_TAG_RE = new RegExp(languageTag, 'i'); | |
190 })(); | |
OLD | NEW |