OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2013 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 // Tests the new String.prototype.normalize method. | |
29 | |
30 | |
31 // Common use case when searching for 'not very exact' match | |
32 // These are examples of data one might encounter in real use | |
Nebojša Ćirić
2013/10/24 21:36:14
End full sentences with .
mnita
2013/10/25 15:22:10
Done.
| |
33 var testRealUseCases = function() { | |
34 // Vietnamese legacy text, old Windows 9x / non-Unicode applications use | |
35 // windows-1258 code page, which is neither precomposed, nor decomposed. | |
36 assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'), | |
37 'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed | |
38 | |
39 // Various kinds of spaces | |
40 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space | |
41 'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space | |
42 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space | |
43 'Google\u2002Maps'.normalize('NFKD')); // en-space | |
44 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space | |
45 'Google\u2003Maps'.normalize('NFKD')); // em-space | |
46 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space | |
47 'Google\u3000Maps'.normalize('NFKC')); // ideographic space | |
48 | |
49 // Latin small ligature "fi" | |
50 assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD')); | |
51 | |
52 // ŀ, Latin small L with middle dot, used in Catalan and often represented | |
53 // as decomposed for non-Unicode environments ( l + ·) | |
54 assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD')); | |
55 | |
56 // Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time) | |
57 assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), // パソコン : wide | |
58 '\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); // パソコン : narrow | |
59 // Also for Japanese, Latin fullwidth forms vs. ASCII | |
60 assertEquals('ABCD'.normalize('NFKD'), | |
61 '\uff21\uff22\uff23\uff24'.normalize('NFKD')); // ABCD, fullwidth | |
62 }(); | |
63 | |
64 | |
65 var testEdgeCases = function() { | |
66 // Make sure we throw RangeError, as the standard requires | |
Nebojša Ćirić
2013/10/24 21:36:14
.
mnita
2013/10/25 15:22:10
Done.
| |
67 assertThrows('"".normalize(1234)', RangeError); | |
68 assertThrows('"".normalize("BAD")', RangeError); | |
69 | |
70 // The standard does not say what kind of exceptions we should throw, so we | |
71 // will not be specific. But we still test that we throw errors. | |
72 assertThrows('s.normalize()'); // s is not defined | |
73 assertThrows('var s = null; s.normalize()'); | |
74 assertThrows('var s = undefined; s.normalize()'); | |
75 assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings | |
76 }(); | |
77 | |
78 | |
79 // Several kinds of mappings. No need to be comprehensive, we don't test | |
80 // the ICU functionality, we only test C - JavaScript 'glue' | |
81 var testData = [ | |
82 // org, default, NFC, NFD, NKFC, NKFD | |
83 ['\u00c7', // Ç : Combining sequence, Latin 1 | |
84 '\u00c7', '\u0043\u0327', | |
85 '\u00c7', '\u0043\u0327'], | |
86 ['\u0218', // Ș : Combining sequence, non-Latin 1 | |
87 '\u0218', '\u0053\u0326', | |
88 '\u0218', '\u0053\u0326'], | |
89 ['\uac00', // 가 : Hangul | |
90 '\uac00', '\u1100\u1161', | |
91 '\uac00', '\u1100\u1161'], | |
92 ['\uff76', // カ : Narrow Kana | |
93 '\uff76', '\uff76', | |
94 '\u30ab', '\u30ab'], | |
95 ['\u00bc', // ¼ : Fractions | |
96 '\u00bc', '\u00bc', | |
97 '\u0031\u2044\u0034', '\u0031\u2044\u0034'], | |
98 ['\u01c6', // dž : Latin ligature | |
99 '\u01c6', '\u01c6', | |
100 '\u0064\u017e', '\u0064\u007a\u030c'], | |
101 ['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks | |
102 '\u1e69', 's\u0323\u0307', | |
103 '\u1e69', 's\u0323\u0307'], | |
104 ['\u3300', // ㌀ : Squared characters | |
105 '\u3300', '\u3300', | |
106 '\u30a2\u30d1\u30fc\u30c8', // アパート | |
107 '\u30a2\u30cf\u309a\u30fc\u30c8'], // アパート | |
108 ['\ufe37', // ︷ : Vertical forms | |
109 '\ufe37', '\ufe37', | |
110 '{' , '{'], | |
111 ['\u2079', // ⁹ : superscript 9 | |
112 '\u2079', '\u2079', | |
113 '9', '9'], | |
114 ['\ufee5\ufee6\ufee7\ufee8', // Arabic forms | |
115 '\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8', | |
116 '\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'], | |
117 ['\u2460', // ① : Circled | |
118 '\u2460', '\u2460', | |
119 '1', '1'], | |
120 ['\u210c', // ℌ : Font variants | |
121 '\u210c', '\u210c', | |
122 'H', 'H'], | |
123 ['\u2126', // Ω : Singleton, OHM sign vs. Greek capital letter OMEGA | |
124 '\u03a9', '\u03a9', | |
125 '\u03a9', '\u03a9'], | |
126 ['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU | |
127 '\ufdfb', '\ufdfb', | |
128 '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647', | |
129 '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647'] | |
130 ]; | |
131 | |
132 var testArray = function() { | |
133 var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4; | |
134 for (var i = 0; i < testData.length; ++i) { | |
135 // the original, NFC and NFD should normalize to the same thing | |
136 for (var column = 0; column < 3; ++column) { | |
137 var str = testData[i][column]; | |
138 assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC | |
139 assertEquals(str.normalize('NFC'), testData[i][kNFC]); | |
140 assertEquals(str.normalize('NFD'), testData[i][kNFD]); | |
141 assertEquals(str.normalize('NFKC'), testData[i][kNFKC]); | |
142 assertEquals(str.normalize('NFKD'), testData[i][kNFKD]); | |
143 } | |
144 } | |
145 }(); | |
OLD | NEW |