OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 2002-2014, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ******************************************************************** | |
6 * | |
7 * @author Mark E. Davis | |
8 * @author Vladimir Weinstein | |
9 */ | |
10 | |
11 #include "unicode/utypes.h" | |
12 | |
13 #if !UCONFIG_NO_NORMALIZATION | |
14 | |
15 #include "intltest.h" | |
16 #include "cstring.h" | |
17 #include "canittst.h" | |
18 #include "unicode/caniter.h" | |
19 #include "unicode/normlzr.h" | |
20 #include "unicode/uchar.h" | |
21 #include "hash.h" | |
22 | |
23 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array))) | |
24 | |
25 #define CASE(id,test) case id: \ | |
26 name = #test; \ | |
27 if (exec) { \ | |
28 logln(#test "---"); \ | |
29 logln((UnicodeString)""); \ | |
30 test(); \ | |
31 } \ | |
32 break | |
33 | |
34 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, | |
35 const char* &name, char* /*par*/) { | |
36 switch (index) { | |
37 CASE(0, TestBasic); | |
38 CASE(1, TestExhaustive); | |
39 CASE(2, TestAPI); | |
40 default: name = ""; break; | |
41 } | |
42 } | |
43 | |
44 /** | |
45 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects | |
46 static UnicodeString str(const char *input) | |
47 { | |
48 UnicodeString str(input, ""); // Invariant conversion | |
49 return str.unescape(); | |
50 } | |
51 */ | |
52 | |
53 | |
54 CanonicalIteratorTest::CanonicalIteratorTest() : | |
55 nameTrans(NULL), hexTrans(NULL) | |
56 { | |
57 } | |
58 | |
59 CanonicalIteratorTest::~CanonicalIteratorTest() | |
60 { | |
61 #if !UCONFIG_NO_TRANSLITERATION | |
62 if(nameTrans != NULL) { | |
63 delete(nameTrans); | |
64 } | |
65 if(hexTrans != NULL) { | |
66 delete(hexTrans); | |
67 } | |
68 #endif | |
69 } | |
70 | |
71 void CanonicalIteratorTest::TestExhaustive() { | |
72 UErrorCode status = U_ZERO_ERROR; | |
73 CanonicalIterator it("", status); | |
74 if (U_FAILURE(status)) { | |
75 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
76 return; | |
77 } | |
78 UChar32 i = 0; | |
79 UnicodeString s; | |
80 // Test static and dynamic class IDs | |
81 if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
82 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDyna
micClassID"); | |
83 } | |
84 for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { | |
85 //for (i = 0xae00; i < 0xaf00; ++i) { | |
86 | |
87 if ((i % 0x100) == 0) { | |
88 logln("Testing U+%06X", i); | |
89 } | |
90 | |
91 // skip characters we know don't have decomps | |
92 int8_t type = u_charType(i); | |
93 if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR | |
94 || type == U_SURROGATE) continue; | |
95 | |
96 s = i; | |
97 characterTest(s, i, it); | |
98 | |
99 s += (UChar32)0x0345; //"\\u0345"; | |
100 characterTest(s, i, it); | |
101 } | |
102 } | |
103 | |
104 void CanonicalIteratorTest::TestBasic() { | |
105 | |
106 UErrorCode status = U_ZERO_ERROR; | |
107 | |
108 static const char * const testArray[][2] = { | |
109 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u
0307, A\\u030A\\u1E0B\\u0327, " | |
110 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0
307, " | |
111 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u032
7, " | |
112 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u030
7"}, | |
113 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C,
\\u010D\\u017E"}, | |
114 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, | |
115 }; | |
116 | |
117 #if 0 | |
118 // This is not interesting for C/C++ as the data is already built beforehand | |
119 // check build | |
120 UnicodeSet ss = CanonicalIterator.getSafeStart(); | |
121 logln("Safe Start: " + ss.toPattern(true)); | |
122 ss = CanonicalIterator.getStarts('a'); | |
123 expectEqual("Characters with 'a' at the start of their decomposition: ", "",
CanonicalIterator.getStarts('a'), | |
124 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB
" | |
125 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1
EAF\u1EB1\u1EB3\u1EB5\u1EB7]") | |
126 ); | |
127 #endif | |
128 | |
129 // check permute | |
130 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed
to be sorted! | |
131 | |
132 Hashtable *permutations = new Hashtable(FALSE, status); | |
133 permutations->setValueDeleter(uprv_deleteUObject); | |
134 UnicodeString toPermute("ABC"); | |
135 | |
136 CanonicalIterator::permute(toPermute, FALSE, permutations, status); | |
137 | |
138 logln("testing permutation"); | |
139 | |
140 expectEqual("Simple permutation ", "", collectionToString(permutations), "AB
C, ACB, BAC, BCA, CAB, CBA"); | |
141 | |
142 delete permutations; | |
143 | |
144 // try samples | |
145 logln("testing samples"); | |
146 Hashtable *set = new Hashtable(FALSE, status); | |
147 set->setValueDeleter(uprv_deleteUObject); | |
148 int32_t i = 0; | |
149 CanonicalIterator it("", status); | |
150 if(U_SUCCESS(status)) { | |
151 for (i = 0; i < ARRAY_LENGTH(testArray); ++i) { | |
152 //logln("Results for: " + name.transliterate(testArray[i])); | |
153 UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); | |
154 it.setSource(testStr, status); | |
155 set->removeAll(); | |
156 for (;;) { | |
157 //UnicodeString *result = new UnicodeString(it.next()); | |
158 UnicodeString result(it.next()); | |
159 if (result.isBogus()) { | |
160 break; | |
161 } | |
162 set->put(result, new UnicodeString(result), status); // Add result
to the table | |
163 //logln(++counter + ": " + hex.transliterate(result)); | |
164 //logln(" = " + name.transliterate(result)); | |
165 } | |
166 expectEqual(i + UnicodeString(": "), testStr, collectionToString(set),
CharsToUnicodeString(testArray[i][1])); | |
167 | |
168 } | |
169 } else { | |
170 dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorNam
e(status)); | |
171 } | |
172 delete set; | |
173 } | |
174 | |
175 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, Canonica
lIterator &it) | |
176 { | |
177 UErrorCode status = U_ZERO_ERROR; | |
178 UnicodeString decomp, comp; | |
179 UBool gotDecomp = FALSE; | |
180 UBool gotComp = FALSE; | |
181 UBool gotSource = FALSE; | |
182 | |
183 Normalizer::decompose(s, FALSE, 0, decomp, status); | |
184 Normalizer::compose(s, FALSE, 0, comp, status); | |
185 | |
186 // skip characters that don't have either decomp. | |
187 // need quick test for this! | |
188 if (s == decomp && s == comp) { | |
189 return; | |
190 } | |
191 | |
192 it.setSource(s, status); | |
193 | |
194 for (;;) { | |
195 UnicodeString item = it.next(); | |
196 if (item.isBogus()) break; | |
197 if (item == s) gotSource = TRUE; | |
198 if (item == decomp) gotDecomp = TRUE; | |
199 if (item == comp) gotComp = TRUE; | |
200 } | |
201 | |
202 if (!gotSource || !gotDecomp || !gotComp) { | |
203 errln("FAIL CanonicalIterator: " + s + (int)ch); | |
204 } | |
205 } | |
206 | |
207 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const Unic
odeString &item, const UnicodeString &a, const UnicodeString &b) { | |
208 if (!(a==b)) { | |
209 errln("FAIL: " + message + getReadable(item)); | |
210 errln("\t" + getReadable(a)); | |
211 errln("\t" + getReadable(b)); | |
212 } else { | |
213 logln("Checked: " + message + getReadable(item)); | |
214 logln("\t" + getReadable(a)); | |
215 logln("\t" + getReadable(b)); | |
216 } | |
217 } | |
218 | |
219 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { | |
220 UErrorCode status = U_ZERO_ERROR; | |
221 UnicodeString result = "["; | |
222 if (s.length() == 0) return ""; | |
223 // set up for readable display | |
224 #if !UCONFIG_NO_TRANSLITERATION | |
225 if(verbose) { | |
226 if (nameTrans == NULL) | |
227 nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRA
NS_FORWARD, status); | |
228 UnicodeString sName = s; | |
229 nameTrans->transliterate(sName); | |
230 result += sName; | |
231 result += ";"; | |
232 } | |
233 if (hexTrans == NULL) | |
234 hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_F
ORWARD, status); | |
235 #endif | |
236 UnicodeString sHex = s; | |
237 #if !UCONFIG_NO_TRANSLITERATION | |
238 if(hexTrans) { // maybe there is no data and transliterator cannot be instan
tiated | |
239 hexTrans->transliterate(sHex); | |
240 } | |
241 #endif | |
242 result += sHex; | |
243 result += "]"; | |
244 return result; | |
245 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transli
terate(s) + "]"; | |
246 } | |
247 | |
248 U_CFUNC int U_CALLCONV | |
249 compareUnicodeStrings(const void *s1, const void *s2) { | |
250 UnicodeString **st1 = (UnicodeString **)s1; | |
251 UnicodeString **st2 = (UnicodeString **)s2; | |
252 | |
253 return (*st1)->compare(**st2); | |
254 } | |
255 | |
256 | |
257 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { | |
258 UnicodeString result; | |
259 | |
260 // Iterate over the Hashtable, then qsort. | |
261 | |
262 UnicodeString **resArray = new UnicodeString*[col->count()]; | |
263 int32_t i = 0; | |
264 | |
265 const UHashElement *ne = NULL; | |
266 int32_t el = UHASH_FIRST; | |
267 //Iterator it = basic.iterator(); | |
268 ne = col->nextElement(el); | |
269 //while (it.hasNext()) | |
270 while (ne != NULL) { | |
271 //String item = (String) it.next(); | |
272 UnicodeString *item = (UnicodeString *)(ne->value.pointer); | |
273 resArray[i++] = item; | |
274 ne = col->nextElement(el); | |
275 } | |
276 | |
277 for(i = 0; i<col->count(); ++i) { | |
278 logln(*resArray[i]); | |
279 } | |
280 | |
281 qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings
); | |
282 | |
283 result = *resArray[0]; | |
284 | |
285 for(i = 1; i<col->count(); ++i) { | |
286 result += ", "; | |
287 result += *resArray[i]; | |
288 } | |
289 | |
290 /* | |
291 Iterator it = col.iterator(); | |
292 while (it.hasNext()) { | |
293 if (result.length() != 0) result.append(", "); | |
294 result.append(it.next().toString()); | |
295 } | |
296 */ | |
297 | |
298 delete [] resArray; | |
299 | |
300 return result; | |
301 } | |
302 | |
303 void CanonicalIteratorTest::TestAPI() { | |
304 UErrorCode status = U_ZERO_ERROR; | |
305 // Test reset and getSource | |
306 UnicodeString start("ljubav"); | |
307 logln("Testing CanonicalIterator::getSource"); | |
308 logln("Instantiating canonical iterator with string "+start); | |
309 CanonicalIterator can(start, status); | |
310 if (U_FAILURE(status)) { | |
311 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
312 return; | |
313 } | |
314 UnicodeString source = can.getSource(); | |
315 logln("CanonicalIterator::getSource returned "+source); | |
316 if(start != source) { | |
317 errln("CanonicalIterator.getSource() didn't return the starting string. Expe
cted "+start+", got "+source); | |
318 } | |
319 logln("Testing CanonicalIterator::reset"); | |
320 UnicodeString next = can.next(); | |
321 logln("CanonicalIterator::next returned "+next); | |
322 | |
323 can.reset(); | |
324 | |
325 UnicodeString afterReset = can.next(); | |
326 logln("After reset, CanonicalIterator::next returned "+afterReset); | |
327 | |
328 if(next != afterReset) { | |
329 errln("Next after instantiation ("+next+") is different from next after rese
t ("+afterReset+")."); | |
330 } | |
331 | |
332 logln("Testing getStaticClassID and getDynamicClassID"); | |
333 if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
334 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticCla
ssID"); | |
335 } | |
336 } | |
337 | |
338 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
OLD | NEW |