test/intl/general/case-mapping.js - Issue 1812673005: Use ICU case conversion/transliterator for case conversion behind a flag - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(1)

My Issues | Starred Open | Closed | All

Side by Side Diff: test/intl/general/case-mapping.js

Issue 1812673005: Use ICU case conversion/transliterator for case conversion behind a flag (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Address Yang's comment. the feature is now harmony_in_progress instead of harmony_staged Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« src/js/i18n.js ('K') | « src/v8.gyp ('k') | test/intl/intl.status » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2016 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // Flags: --icu_case_mapping
	Dan Ehrenberg 2016/05/10 19:29:37 This line is not used by the current intl test run This line is not used by the current intl test runner. Ideally, update test/intl/testcfg.py to forward flags the way mjsunit does (in which case you can make this a passing test expectation!) or remove this line. jungshik at Google 2016/05/10 23:22:33 Changes intl/testcfg.py to support 'Flags: ...'. T Show quoted text On 2016/05/10 19:29:37, Dan Ehrenberg wrote: > This line is not used by the current intl test runner. Ideally, update > test/intl/testcfg.py to forward flags the way mjsunit does (in which case you > can make this a passing test expectation!) or remove this line. Changes intl/testcfg.py to support 'Flags: ...'. Thanks for the suggestion.
	6

	7 // Some edge cases that unibrow got wrong

	8

	9 assertEquals("𐐘", "𐑀".toUpperCase());

	10 assertEquals("𐑀", "𐐘".toLowerCase());

	11 assertEquals("σ", "Σ".toLowerCase());

	12

	13 // Some different paths in the ICU case conversion fastpath

	14

	15 assertEquals("σς", "\u03A3\u03A3".toLowerCase());

	16 // Expand sharp s in latin1 fastpath

	17 assertEquals("ASSB", "A\u00DFB".toUpperCase());

	18 assertEquals("AB", "Ab".toUpperCase());

	19 // Find first upper case in fastpath

	20 assertEquals("ab", "aB".toLowerCase());

	21 assertEquals("AÜ", "aü".toUpperCase());

	22 assertEquals("AÜ", "AÜ".toUpperCase());

	23 assertEquals("aü", "aü".toLowerCase());

	24 assertEquals("aü", "AÜ".toLowerCase());

	25 assertEquals("aü", "AÜ".toLowerCase());

	26

	27 // Starts with fastpath, but switches to full Unicode path

	28 // U+00FF is uppercased to U+0178.

	29 assertEquals("AŸ", "aÿ".toUpperCase());

	30 // U+00B5 (µ) is uppercased to U+039C (Μ)

	31 assertEquals("AΜ", "aµ".toUpperCase());

	32

	33 // Buffer size increase

	34 assertEquals("CSSBẶ", "cßbặ".toUpperCase());

	35 assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());

	36 // OneByte input with buffer size increase: non-fast path

	37 assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));

	38

	39 // More comprehensive tests for "tr", "az" and "lt" are in

	40 // test262/intl402/Strings/*

	41

	42 // Buffer size decrease with a single locale or locale list.

	43 // In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped.

	44 assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));

	45 assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));

	46 assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));

	47

	48 // Cons string

	49 assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));

	50 assertEquals("abcijkl",

	51 ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));

	52 assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));

	53 assertEquals("abci\u0307jkl",

	54 ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));

	55 assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());

	56 assertEquals("abci\u0307jkl",

	57 ("aB" + "cI" + "\u0307j" + "kl").toLowerCase());

	58

	59 // "tr" and "az" should behave identically.

	60 assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),

	61 "aBcI\u0307".toLocaleLowerCase("az"));

	62 // What matters is the first locale in the locale list.

	63 assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),

	64 "aBcI\u0307".toLocaleLowerCase("tr"));

	65 assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),

	66 "aBcI\u0307".toLocaleLowerCase("en"));

	67 assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),

	68 "aBcI\u0307".toLowerCase());

	69

	70 // An empty locale list is the same as the default locale. Try these tests

	71 // under Turkish and Greek locale.

	72 assertEquals("aBcI\u0307".toLocaleLowerCase([]),

	73 "aBcI\u0307".toLocaleLowerCase());

	74 assertEquals("aBcI\u0307".toLocaleLowerCase([]),

	75 "aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));

	76 assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());

	77 assertEquals("άόύώ".toLocaleUpperCase([]),

	78 "άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));

	79

	80

	81 // English/root locale keeps U+0307 (combining dot above).

	82 assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));

	83 assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));

	84 assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());

	85

	86 // Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and

	87 // other diacritic marks are dropped. This rule is based on the current CLDR's

	88 // el-Upper transformation, but Greek uppercasing rules are more sophisticated

	89 // than this. See http://bugs.icu-project.org/trac/ticket/10582 and

	90 // http://unicode.org/cldr/trac/ticket/7905 .

	91 assertEquals("Α", "α\u0301".toLocaleUpperCase("el"));

	92 assertEquals("Α", "α\u0301".toLocaleUpperCase("el-GR"));

	93 assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek"));

	94 assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek-GR"));

	95 assertEquals("Α", "ά".toLocaleUpperCase("el"));

	96 assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));

	97 assertEquals("ΑΟΥΩ", "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));

	98 assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));

	99 assertEquals("ΟΕ", "Ό\u1f15".toLocaleUpperCase("el"));

	100 assertEquals("ΟΕ", "Ο\u0301ε\u0314\u0301".toLocaleUpperCase("el"));

	101

	102 // Input and output are identical.

	103 assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));

	104 assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));

	105 assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));

	106 assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));

	107 // ASCII-only or Latin-1 only: 1-byte

	108 assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));

	109 assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));

	110

	111 // To make sure that the input string is not overwritten in place.

	112 var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];

	113 for (var s of strings) {

	114 var backup = s;

	115 var uppered = s.toLocaleUpperCase("el");
	Yang 2016/05/10 20:37:36 unfortunately this is not the correct test. string unfortunately this is not the correct test. strings in js are immutable. backup would simply alias s. comparing s to the expected string literal also wouldnt work, since string literals are canonicalized and aliases of each other. You would need to construct the expectation somehow, for example using array.join. jungshik at Google 2016/05/10 23:33:24 Thank you for enlightening me ! Now, I split \|s\| Show quoted text On 2016/05/10 20:37:36, Yang wrote: > unfortunately this is not the correct test. strings in js are immutable. backup > would simply alias s. > > comparing s to the expected string literal also wouldnt work, since string > literals are canonicalized and aliases of each other. You would need to > construct the expectation somehow, for example using array.join. Thank you for enlightening me ! Now, I split \|s\| to an array (backup) and join it again to compare with \|s\|.
	116 assertEquals(s, backup);

	117 }

	118

	119 // In other locales, U+0301 is preserved.

	120 assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",

	121 "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));

	122 assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",

	123 "α\u0301ο\u0301υ\u0301ω\u0301".toUpperCase());

	124

	125 // Plane 1; Deseret and Warang Citi Script.

	126 assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());

	127 assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());

	128 // Mathematical Bold {Capital, Small} Letter A do not change.

	129 assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());

	130 assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());

	131 // Plane 1; New characters in Unicode 8.0

	132 assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());

	133 assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());

	134 assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());

	135 assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());

	136 assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));

	137 assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));

	138 assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());

OLD	NEW

« src/js/i18n.js ('K') | « src/v8.gyp ('k') | test/intl/intl.status » ('j') | no next file with comments »

Powered by Google App Engine

This is Rietveld 408576698