Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: src/js/i18n.js

Issue 1812673005: Use ICU case conversion/transliterator for case conversion behind a flag (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: use FlatContent for uppercase; add 3 templatized helpers Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // ECMAScript 402 API implementation. 5 // ECMAScript 402 API implementation.
6 6
7 /** 7 /**
8 * Intl object is a single object that has some named properties, 8 * Intl object is a single object that has some named properties,
9 * all of which are constructors. 9 * all of which are constructors.
10 */ 10 */
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
137 'numberformat': UNDEFINED, 137 'numberformat': UNDEFINED,
138 'dateformat': UNDEFINED, 138 'dateformat': UNDEFINED,
139 'breakiterator': UNDEFINED 139 'breakiterator': UNDEFINED
140 }; 140 };
141 141
142 /** 142 /**
143 * Caches default ICU locale. 143 * Caches default ICU locale.
144 */ 144 */
145 var DEFAULT_ICU_LOCALE = UNDEFINED; 145 var DEFAULT_ICU_LOCALE = UNDEFINED;
146 146
147 function GetDefaultICULocaleJS() {
148 if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) {
149 DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
150 }
151 return DEFAULT_ICU_LOCALE;
152 }
153
147 /** 154 /**
148 * Unicode extension regular expression. 155 * Unicode extension regular expression.
149 */ 156 */
150 var UNICODE_EXTENSION_RE = UNDEFINED; 157 var UNICODE_EXTENSION_RE = UNDEFINED;
151 158
152 function GetUnicodeExtensionRE() { 159 function GetUnicodeExtensionRE() {
153 if (IS_UNDEFINED(UNDEFINED)) { 160 if (IS_UNDEFINED(UNDEFINED)) {
154 UNICODE_EXTENSION_RE = new GlobalRegExp('-u(-[a-z0-9]{2,8})+', 'g'); 161 UNICODE_EXTENSION_RE = new GlobalRegExp('-u(-[a-z0-9]{2,8})+', 'g');
155 } 162 }
156 return UNICODE_EXTENSION_RE; 163 return UNICODE_EXTENSION_RE;
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 // Truncate locale if possible. 448 // Truncate locale if possible.
442 var pos = %_Call(StringLastIndexOf, locale, '-'); 449 var pos = %_Call(StringLastIndexOf, locale, '-');
443 if (pos === -1) { 450 if (pos === -1) {
444 break; 451 break;
445 } 452 }
446 locale = %_Call(StringSubstring, locale, 0, pos); 453 locale = %_Call(StringSubstring, locale, 0, pos);
447 } while (true); 454 } while (true);
448 } 455 }
449 456
450 // Didn't find a match, return default. 457 // Didn't find a match, return default.
451 if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) { 458 return {'locale': GetDefaultICULocaleJS(), 'extension': '', 'position': -1};
452 DEFAULT_ICU_LOCALE = %GetDefaultICULocale();
453 }
454
455 return {'locale': DEFAULT_ICU_LOCALE, 'extension': '', 'position': -1};
456 } 459 }
457 460
458 461
459 /** 462 /**
460 * Returns best matched supported locale and extension info using 463 * Returns best matched supported locale and extension info using
461 * implementation dependend algorithm. 464 * implementation dependend algorithm.
462 */ 465 */
463 function bestFitMatcher(service, requestedLocales) { 466 function bestFitMatcher(service, requestedLocales) {
464 // TODO(cira): implement better best fit algorithm. 467 // TODO(cira): implement better best fit algorithm.
465 return lookupMatcher(service, requestedLocales); 468 return lookupMatcher(service, requestedLocales);
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
715 } 718 }
716 } 719 }
717 return result; 720 return result;
718 } 721 }
719 722
720 /** 723 /**
721 * Canonicalizes the language tag, or throws in case the tag is invalid. 724 * Canonicalizes the language tag, or throws in case the tag is invalid.
722 */ 725 */
723 function canonicalizeLanguageTag(localeID) { 726 function canonicalizeLanguageTag(localeID) {
724 // null is typeof 'object' so we have to do extra check. 727 // null is typeof 'object' so we have to do extra check.
725 if (typeof localeID !== 'string' && typeof localeID !== 'object' || 728 if ((typeof localeID !== 'string' && typeof localeID !== 'object') ||
Dan Ehrenberg 2016/04/20 22:01:29 Nit: As long as you're changing this code, could y
jungshik at Google 2016/04/21 20:39:17 Done.
726 IS_NULL(localeID)) { 729 IS_NULL(localeID)) {
727 throw MakeTypeError(kLanguageID); 730 throw MakeTypeError(kLanguageID);
728 } 731 }
729 732
733 // Optimize for the most common case; a language code alone in
734 // the canonical form/lowercase (e.g. "en", "fil").
735 if (typeof localeID === 'string' &&
Dan Ehrenberg 2016/04/20 22:01:29 IS_STRING
jungshik at Google 2016/04/21 20:39:16 Done.
736 !IS_NULL(InternalRegExpMatch(/^[a-z]{2,3}$/, localeID)))
737 return localeID;
738
730 var localeString = GlobalString(localeID); 739 var localeString = GlobalString(localeID);
731 740
732 if (isValidLanguageTag(localeString) === false) { 741 if (isValidLanguageTag(localeString) === false) {
733 throw MakeRangeError(kInvalidLanguageTag, localeString); 742 throw MakeRangeError(kInvalidLanguageTag, localeString);
734 } 743 }
735 744
736 // This call will strip -kn but not -kn-true extensions.
737 // ICU bug filled - http://bugs.icu-project.org/trac/ticket/9265.
738 // TODO(cira): check if -u-kn-true-kc-true-kh-true still throws after
739 // upgrade to ICU 4.9.
740 var tag = %CanonicalizeLanguageTag(localeString); 745 var tag = %CanonicalizeLanguageTag(localeString);
741 if (tag === 'invalid-tag') { 746 if (tag === 'invalid-tag') {
742 throw MakeRangeError(kInvalidLanguageTag, localeString); 747 throw MakeRangeError(kInvalidLanguageTag, localeString);
743 } 748 }
744 749
745 return tag; 750 return tag;
746 } 751 }
747 752
748 753
749 /** 754 /**
(...skipping 1235 matching lines...) Expand 10 before | Expand all | Expand 10 after
1985 var useOptions = (IS_UNDEFINED(defaults)) ? options : defaults; 1990 var useOptions = (IS_UNDEFINED(defaults)) ? options : defaults;
1986 if (IS_UNDEFINED(locales) && IS_UNDEFINED(options)) { 1991 if (IS_UNDEFINED(locales) && IS_UNDEFINED(options)) {
1987 if (IS_UNDEFINED(defaultObjects[service])) { 1992 if (IS_UNDEFINED(defaultObjects[service])) {
1988 defaultObjects[service] = new savedObjects[service](locales, useOptions); 1993 defaultObjects[service] = new savedObjects[service](locales, useOptions);
1989 } 1994 }
1990 return defaultObjects[service]; 1995 return defaultObjects[service];
1991 } 1996 }
1992 return new savedObjects[service](locales, useOptions); 1997 return new savedObjects[service](locales, useOptions);
1993 } 1998 }
1994 1999
2000 function getCaseConversionLanguageId(locales) {
Dan Ehrenberg 2016/04/20 22:01:29 Generally, the v8 convention is CamelCase with an
jungshik at Google 2016/04/21 20:39:16 Done.
2001 var language;
2002 // Optimize for the most common two cases. initializeLocaleList() can handle
2003 // them as well, but it's rather slow accounting for over 60% of
2004 // toLocale{U,L}Case() and about 40% of toLocale{U,L}Case("<locale>").
2005 if (IS_UNDEFINED(locales)) {
2006 language = GetDefaultICULocaleJS();
2007 } else if (typeof locales === 'string') {
Dan Ehrenberg 2016/04/20 22:01:29 IS_STRING
jungshik at Google 2016/04/21 20:39:16 Done.
2008 language = canonicalizeLanguageTag(locales);
2009 } else {
2010 var locales = initializeLocaleList(locales);
2011 language = locales.length > 0 ? locales[0] : GetDefaultICULocaleJS();
2012 }
2013
2014 // StringSplit is slwoer than this.
Dan Ehrenberg 2016/04/20 22:01:29 slower Consider factoring this out into a utility
jungshik at Google 2016/04/21 20:39:16 Typo fixed.
2015 var pos = %_Call(StringIndexOf, language, '-');
2016 if (pos != -1)
2017 language = %_Call(StringSubstring, language, 0, pos);
2018
2019 var CUSTOM_CASE_LANGUAGES = ['az', 'el', 'lt', 'tr'];
Dan Ehrenberg 2016/04/20 22:01:29 Could we somehow query this from ICU? I thought th
jungshik at Google 2016/04/21 20:39:17 Some typesetting systems in the past couldn't deal
2020 return %_Call(ArrayIndexOf, CUSTOM_CASE_LANGUAGES, language);
2021 }
2022
2023 function localeConvertCase(s, locales, isToUpper) {
Dan Ehrenberg 2016/04/20 22:01:29 LocaleConvertCase
jungshik at Google 2016/04/21 20:39:16 Done.
2024 var caseConversionLanguageId = getCaseConversionLanguageId(locales);
2025 if (caseConversionLanguageId == -1)
2026 return isToUpper ? %StringToUpperCaseI18N(s) : %StringToLowerCaseI18N(s);
2027 return %StringLocaleConvertCase(s, isToUpper, caseConversionLanguageId);
2028 }
2029
1995 /** 2030 /**
1996 * Compares this and that, and returns less than 0, 0 or greater than 0 value. 2031 * Compares this and that, and returns less than 0, 0 or greater than 0 value.
1997 * Overrides the built-in method. 2032 * Overrides the built-in method.
1998 */ 2033 */
1999 OverrideFunction(GlobalString.prototype, 'localeCompare', function(that) { 2034 OverrideFunction(GlobalString.prototype, 'localeCompare', function(that) {
2000 if (!IS_UNDEFINED(new.target)) { 2035 if (!IS_UNDEFINED(new.target)) {
2001 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor); 2036 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2002 } 2037 }
2003 2038
2004 if (IS_NULL_OR_UNDEFINED(this)) { 2039 if (IS_NULL_OR_UNDEFINED(this)) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
2037 var normalizationForm = %_Call(ArrayIndexOf, NORMALIZATION_FORMS, form); 2072 var normalizationForm = %_Call(ArrayIndexOf, NORMALIZATION_FORMS, form);
2038 if (normalizationForm === -1) { 2073 if (normalizationForm === -1) {
2039 throw MakeRangeError(kNormalizationForm, 2074 throw MakeRangeError(kNormalizationForm,
2040 %_Call(ArrayJoin, NORMALIZATION_FORMS, ', ')); 2075 %_Call(ArrayJoin, NORMALIZATION_FORMS, ', '));
2041 } 2076 }
2042 2077
2043 return %StringNormalize(s, normalizationForm); 2078 return %StringNormalize(s, normalizationForm);
2044 } 2079 }
2045 ); 2080 );
2046 2081
2082 OverrideFunction(GlobalString.prototype, 'toLowerCase', function() {
2083 if (!IS_UNDEFINED(new.target)) {
2084 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2085 }
2086 CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
2087 var s = TO_STRING(this);
2088 return %StringToLowerCaseI18N(s);
Dan Ehrenberg 2016/04/20 22:01:29 ECMA262 seems to specify using the root locale in
jungshik at Google 2016/04/21 20:39:16 Right.
2089 }
2090 );
2091
2092 OverrideFunction(GlobalString.prototype, 'toUpperCase', function() {
2093 if (!IS_UNDEFINED(new.target)) {
2094 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2095 }
2096 CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLowerCase");
2097 var s = TO_STRING(this);
2098 return %StringToUpperCaseI18N(s);
2099 }
2100 );
2101
2102 OverrideFunction(GlobalString.prototype, 'toLocaleLowerCase', function() {
2103 if (!IS_UNDEFINED(new.target)) {
2104 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2105 }
2106 CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleLowerCase");
2107 return localeConvertCase(TO_STRING(this), arguments[0], false);
2108 }
2109 );
2110
2111
2112 OverrideFunction(GlobalString.prototype, 'toLocaleUpperCase', function() {
2113 if (!IS_UNDEFINED(new.target)) {
2114 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2115 }
2116 CHECK_OBJECT_COERCIBLE(this, "String.prototype.toLocaleUpperCase");
2117 return localeConvertCase(TO_STRING(this), arguments[0], true);
2118 }
2119 );
2120
2047 2121
2048 /** 2122 /**
2049 * Formats a Number object (this) using locale and options values. 2123 * Formats a Number object (this) using locale and options values.
2050 * If locale or options are omitted, defaults are used. 2124 * If locale or options are omitted, defaults are used.
2051 */ 2125 */
2052 OverrideFunction(GlobalNumber.prototype, 'toLocaleString', function() { 2126 OverrideFunction(GlobalNumber.prototype, 'toLocaleString', function() {
2053 if (!IS_UNDEFINED(new.target)) { 2127 if (!IS_UNDEFINED(new.target)) {
2054 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor); 2128 throw MakeTypeError(kOrdinaryFunctionCalledAsConstructor);
2055 } 2129 }
2056 2130
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
2132 } 2206 }
2133 2207
2134 var locales = arguments[0]; 2208 var locales = arguments[0];
2135 var options = arguments[1]; 2209 var options = arguments[1];
2136 return toLocaleDateTime( 2210 return toLocaleDateTime(
2137 this, locales, options, 'time', 'time', 'dateformattime'); 2211 this, locales, options, 'time', 'time', 'dateformattime');
2138 } 2212 }
2139 ); 2213 );
2140 2214
2141 }) 2215 })
OLDNEW
« no previous file with comments | « src/flag-definitions.h ('k') | src/runtime/runtime.h » ('j') | src/runtime/runtime-i18n.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698