| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 2002-2014, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ******************************************************************** | |
| 6 * | |
| 7 * @author Mark E. Davis | |
| 8 * @author Vladimir Weinstein | |
| 9 */ | |
| 10 | |
| 11 #include "unicode/utypes.h" | |
| 12 | |
| 13 #if !UCONFIG_NO_NORMALIZATION | |
| 14 | |
| 15 #include "intltest.h" | |
| 16 #include "cstring.h" | |
| 17 #include "canittst.h" | |
| 18 #include "unicode/caniter.h" | |
| 19 #include "unicode/normlzr.h" | |
| 20 #include "unicode/uchar.h" | |
| 21 #include "hash.h" | |
| 22 | |
| 23 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array))) | |
| 24 | |
| 25 #define CASE(id,test) case id: \ | |
| 26 name = #test; \ | |
| 27 if (exec) { \ | |
| 28 logln(#test "---"); \ | |
| 29 logln((UnicodeString)""); \ | |
| 30 test(); \ | |
| 31 } \ | |
| 32 break | |
| 33 | |
| 34 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, | |
| 35 const char* &name, char* /*par*/) { | |
| 36 switch (index) { | |
| 37 CASE(0, TestBasic); | |
| 38 CASE(1, TestExhaustive); | |
| 39 CASE(2, TestAPI); | |
| 40 default: name = ""; break; | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 /** | |
| 45 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects | |
| 46 static UnicodeString str(const char *input) | |
| 47 { | |
| 48 UnicodeString str(input, ""); // Invariant conversion | |
| 49 return str.unescape(); | |
| 50 } | |
| 51 */ | |
| 52 | |
| 53 | |
| 54 CanonicalIteratorTest::CanonicalIteratorTest() : | |
| 55 nameTrans(NULL), hexTrans(NULL) | |
| 56 { | |
| 57 } | |
| 58 | |
| 59 CanonicalIteratorTest::~CanonicalIteratorTest() | |
| 60 { | |
| 61 #if !UCONFIG_NO_TRANSLITERATION | |
| 62 if(nameTrans != NULL) { | |
| 63 delete(nameTrans); | |
| 64 } | |
| 65 if(hexTrans != NULL) { | |
| 66 delete(hexTrans); | |
| 67 } | |
| 68 #endif | |
| 69 } | |
| 70 | |
| 71 void CanonicalIteratorTest::TestExhaustive() { | |
| 72 UErrorCode status = U_ZERO_ERROR; | |
| 73 CanonicalIterator it("", status); | |
| 74 if (U_FAILURE(status)) { | |
| 75 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
| 76 return; | |
| 77 } | |
| 78 UChar32 i = 0; | |
| 79 UnicodeString s; | |
| 80 // Test static and dynamic class IDs | |
| 81 if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
| 82 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDyna
micClassID"); | |
| 83 } | |
| 84 for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { | |
| 85 //for (i = 0xae00; i < 0xaf00; ++i) { | |
| 86 | |
| 87 if ((i % 0x100) == 0) { | |
| 88 logln("Testing U+%06X", i); | |
| 89 } | |
| 90 | |
| 91 // skip characters we know don't have decomps | |
| 92 int8_t type = u_charType(i); | |
| 93 if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR | |
| 94 || type == U_SURROGATE) continue; | |
| 95 | |
| 96 s = i; | |
| 97 characterTest(s, i, it); | |
| 98 | |
| 99 s += (UChar32)0x0345; //"\\u0345"; | |
| 100 characterTest(s, i, it); | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 void CanonicalIteratorTest::TestBasic() { | |
| 105 | |
| 106 UErrorCode status = U_ZERO_ERROR; | |
| 107 | |
| 108 static const char * const testArray[][2] = { | |
| 109 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u
0307, A\\u030A\\u1E0B\\u0327, " | |
| 110 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0
307, " | |
| 111 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u032
7, " | |
| 112 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u030
7"}, | |
| 113 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C,
\\u010D\\u017E"}, | |
| 114 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, | |
| 115 }; | |
| 116 | |
| 117 #if 0 | |
| 118 // This is not interesting for C/C++ as the data is already built beforehand | |
| 119 // check build | |
| 120 UnicodeSet ss = CanonicalIterator.getSafeStart(); | |
| 121 logln("Safe Start: " + ss.toPattern(true)); | |
| 122 ss = CanonicalIterator.getStarts('a'); | |
| 123 expectEqual("Characters with 'a' at the start of their decomposition: ", "",
CanonicalIterator.getStarts('a'), | |
| 124 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB
" | |
| 125 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1
EAF\u1EB1\u1EB3\u1EB5\u1EB7]") | |
| 126 ); | |
| 127 #endif | |
| 128 | |
| 129 // check permute | |
| 130 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed
to be sorted! | |
| 131 | |
| 132 Hashtable *permutations = new Hashtable(FALSE, status); | |
| 133 permutations->setValueDeleter(uprv_deleteUObject); | |
| 134 UnicodeString toPermute("ABC"); | |
| 135 | |
| 136 CanonicalIterator::permute(toPermute, FALSE, permutations, status); | |
| 137 | |
| 138 logln("testing permutation"); | |
| 139 | |
| 140 expectEqual("Simple permutation ", "", collectionToString(permutations), "AB
C, ACB, BAC, BCA, CAB, CBA"); | |
| 141 | |
| 142 delete permutations; | |
| 143 | |
| 144 // try samples | |
| 145 logln("testing samples"); | |
| 146 Hashtable *set = new Hashtable(FALSE, status); | |
| 147 set->setValueDeleter(uprv_deleteUObject); | |
| 148 int32_t i = 0; | |
| 149 CanonicalIterator it("", status); | |
| 150 if(U_SUCCESS(status)) { | |
| 151 for (i = 0; i < ARRAY_LENGTH(testArray); ++i) { | |
| 152 //logln("Results for: " + name.transliterate(testArray[i])); | |
| 153 UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); | |
| 154 it.setSource(testStr, status); | |
| 155 set->removeAll(); | |
| 156 for (;;) { | |
| 157 //UnicodeString *result = new UnicodeString(it.next()); | |
| 158 UnicodeString result(it.next()); | |
| 159 if (result.isBogus()) { | |
| 160 break; | |
| 161 } | |
| 162 set->put(result, new UnicodeString(result), status); // Add result
to the table | |
| 163 //logln(++counter + ": " + hex.transliterate(result)); | |
| 164 //logln(" = " + name.transliterate(result)); | |
| 165 } | |
| 166 expectEqual(i + UnicodeString(": "), testStr, collectionToString(set),
CharsToUnicodeString(testArray[i][1])); | |
| 167 | |
| 168 } | |
| 169 } else { | |
| 170 dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorNam
e(status)); | |
| 171 } | |
| 172 delete set; | |
| 173 } | |
| 174 | |
| 175 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, Canonica
lIterator &it) | |
| 176 { | |
| 177 UErrorCode status = U_ZERO_ERROR; | |
| 178 UnicodeString decomp, comp; | |
| 179 UBool gotDecomp = FALSE; | |
| 180 UBool gotComp = FALSE; | |
| 181 UBool gotSource = FALSE; | |
| 182 | |
| 183 Normalizer::decompose(s, FALSE, 0, decomp, status); | |
| 184 Normalizer::compose(s, FALSE, 0, comp, status); | |
| 185 | |
| 186 // skip characters that don't have either decomp. | |
| 187 // need quick test for this! | |
| 188 if (s == decomp && s == comp) { | |
| 189 return; | |
| 190 } | |
| 191 | |
| 192 it.setSource(s, status); | |
| 193 | |
| 194 for (;;) { | |
| 195 UnicodeString item = it.next(); | |
| 196 if (item.isBogus()) break; | |
| 197 if (item == s) gotSource = TRUE; | |
| 198 if (item == decomp) gotDecomp = TRUE; | |
| 199 if (item == comp) gotComp = TRUE; | |
| 200 } | |
| 201 | |
| 202 if (!gotSource || !gotDecomp || !gotComp) { | |
| 203 errln("FAIL CanonicalIterator: " + s + (int)ch); | |
| 204 } | |
| 205 } | |
| 206 | |
| 207 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const Unic
odeString &item, const UnicodeString &a, const UnicodeString &b) { | |
| 208 if (!(a==b)) { | |
| 209 errln("FAIL: " + message + getReadable(item)); | |
| 210 errln("\t" + getReadable(a)); | |
| 211 errln("\t" + getReadable(b)); | |
| 212 } else { | |
| 213 logln("Checked: " + message + getReadable(item)); | |
| 214 logln("\t" + getReadable(a)); | |
| 215 logln("\t" + getReadable(b)); | |
| 216 } | |
| 217 } | |
| 218 | |
| 219 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { | |
| 220 UErrorCode status = U_ZERO_ERROR; | |
| 221 UnicodeString result = "["; | |
| 222 if (s.length() == 0) return ""; | |
| 223 // set up for readable display | |
| 224 #if !UCONFIG_NO_TRANSLITERATION | |
| 225 if(verbose) { | |
| 226 if (nameTrans == NULL) | |
| 227 nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRA
NS_FORWARD, status); | |
| 228 UnicodeString sName = s; | |
| 229 nameTrans->transliterate(sName); | |
| 230 result += sName; | |
| 231 result += ";"; | |
| 232 } | |
| 233 if (hexTrans == NULL) | |
| 234 hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_F
ORWARD, status); | |
| 235 #endif | |
| 236 UnicodeString sHex = s; | |
| 237 #if !UCONFIG_NO_TRANSLITERATION | |
| 238 if(hexTrans) { // maybe there is no data and transliterator cannot be instan
tiated | |
| 239 hexTrans->transliterate(sHex); | |
| 240 } | |
| 241 #endif | |
| 242 result += sHex; | |
| 243 result += "]"; | |
| 244 return result; | |
| 245 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transli
terate(s) + "]"; | |
| 246 } | |
| 247 | |
| 248 U_CFUNC int U_CALLCONV | |
| 249 compareUnicodeStrings(const void *s1, const void *s2) { | |
| 250 UnicodeString **st1 = (UnicodeString **)s1; | |
| 251 UnicodeString **st2 = (UnicodeString **)s2; | |
| 252 | |
| 253 return (*st1)->compare(**st2); | |
| 254 } | |
| 255 | |
| 256 | |
| 257 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { | |
| 258 UnicodeString result; | |
| 259 | |
| 260 // Iterate over the Hashtable, then qsort. | |
| 261 | |
| 262 UnicodeString **resArray = new UnicodeString*[col->count()]; | |
| 263 int32_t i = 0; | |
| 264 | |
| 265 const UHashElement *ne = NULL; | |
| 266 int32_t el = UHASH_FIRST; | |
| 267 //Iterator it = basic.iterator(); | |
| 268 ne = col->nextElement(el); | |
| 269 //while (it.hasNext()) | |
| 270 while (ne != NULL) { | |
| 271 //String item = (String) it.next(); | |
| 272 UnicodeString *item = (UnicodeString *)(ne->value.pointer); | |
| 273 resArray[i++] = item; | |
| 274 ne = col->nextElement(el); | |
| 275 } | |
| 276 | |
| 277 for(i = 0; i<col->count(); ++i) { | |
| 278 logln(*resArray[i]); | |
| 279 } | |
| 280 | |
| 281 qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings
); | |
| 282 | |
| 283 result = *resArray[0]; | |
| 284 | |
| 285 for(i = 1; i<col->count(); ++i) { | |
| 286 result += ", "; | |
| 287 result += *resArray[i]; | |
| 288 } | |
| 289 | |
| 290 /* | |
| 291 Iterator it = col.iterator(); | |
| 292 while (it.hasNext()) { | |
| 293 if (result.length() != 0) result.append(", "); | |
| 294 result.append(it.next().toString()); | |
| 295 } | |
| 296 */ | |
| 297 | |
| 298 delete [] resArray; | |
| 299 | |
| 300 return result; | |
| 301 } | |
| 302 | |
| 303 void CanonicalIteratorTest::TestAPI() { | |
| 304 UErrorCode status = U_ZERO_ERROR; | |
| 305 // Test reset and getSource | |
| 306 UnicodeString start("ljubav"); | |
| 307 logln("Testing CanonicalIterator::getSource"); | |
| 308 logln("Instantiating canonical iterator with string "+start); | |
| 309 CanonicalIterator can(start, status); | |
| 310 if (U_FAILURE(status)) { | |
| 311 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); | |
| 312 return; | |
| 313 } | |
| 314 UnicodeString source = can.getSource(); | |
| 315 logln("CanonicalIterator::getSource returned "+source); | |
| 316 if(start != source) { | |
| 317 errln("CanonicalIterator.getSource() didn't return the starting string. Expe
cted "+start+", got "+source); | |
| 318 } | |
| 319 logln("Testing CanonicalIterator::reset"); | |
| 320 UnicodeString next = can.next(); | |
| 321 logln("CanonicalIterator::next returned "+next); | |
| 322 | |
| 323 can.reset(); | |
| 324 | |
| 325 UnicodeString afterReset = can.next(); | |
| 326 logln("After reset, CanonicalIterator::next returned "+afterReset); | |
| 327 | |
| 328 if(next != afterReset) { | |
| 329 errln("Next after instantiation ("+next+") is different from next after rese
t ("+afterReset+")."); | |
| 330 } | |
| 331 | |
| 332 logln("Testing getStaticClassID and getDynamicClassID"); | |
| 333 if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ | |
| 334 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticCla
ssID"); | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
| OLD | NEW |