OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 2002-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ******************************************************************** |
| 6 * |
| 7 * @author Mark E. Davis |
| 8 * @author Vladimir Weinstein |
| 9 */ |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 |
| 13 #if !UCONFIG_NO_NORMALIZATION |
| 14 |
| 15 #include "intltest.h" |
| 16 #include "cstring.h" |
| 17 #include "canittst.h" |
| 18 #include "unicode/caniter.h" |
| 19 #include "unicode/normlzr.h" |
| 20 #include "unicode/uchar.h" |
| 21 #include "hash.h" |
| 22 |
| 23 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array))) |
| 24 |
| 25 #define CASE(id,test) case id: \ |
| 26 name = #test; \ |
| 27 if (exec) { \ |
| 28 logln(#test "---"); \ |
| 29 logln((UnicodeString)""); \ |
| 30 test(); \ |
| 31 } \ |
| 32 break |
| 33 |
| 34 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, |
| 35 const char* &name, char* /*par*/) { |
| 36 switch (index) { |
| 37 CASE(0, TestBasic); |
| 38 CASE(1, TestExhaustive); |
| 39 CASE(2, TestAPI); |
| 40 default: name = ""; break; |
| 41 } |
| 42 } |
| 43 |
| 44 /** |
| 45 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects |
| 46 static UnicodeString str(const char *input) |
| 47 { |
| 48 UnicodeString str(input, ""); // Invariant conversion |
| 49 return str.unescape(); |
| 50 } |
| 51 */ |
| 52 |
| 53 |
| 54 CanonicalIteratorTest::CanonicalIteratorTest() : |
| 55 nameTrans(NULL), hexTrans(NULL) |
| 56 { |
| 57 } |
| 58 |
| 59 CanonicalIteratorTest::~CanonicalIteratorTest() |
| 60 { |
| 61 #if !UCONFIG_NO_TRANSLITERATION |
| 62 if(nameTrans != NULL) { |
| 63 delete(nameTrans); |
| 64 } |
| 65 if(hexTrans != NULL) { |
| 66 delete(hexTrans); |
| 67 } |
| 68 #endif |
| 69 } |
| 70 |
| 71 void CanonicalIteratorTest::TestExhaustive() { |
| 72 UErrorCode status = U_ZERO_ERROR; |
| 73 CanonicalIterator it("", status); |
| 74 if (U_FAILURE(status)) { |
| 75 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); |
| 76 return; |
| 77 } |
| 78 UChar32 i = 0; |
| 79 UnicodeString s; |
| 80 // Test static and dynamic class IDs |
| 81 if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ |
| 82 errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDyna
micClassID"); |
| 83 } |
| 84 for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { |
| 85 //for (i = 0xae00; i < 0xaf00; ++i) { |
| 86 |
| 87 if ((i % 0x100) == 0) { |
| 88 logln("Testing U+%06X", i); |
| 89 } |
| 90 |
| 91 // skip characters we know don't have decomps |
| 92 int8_t type = u_charType(i); |
| 93 if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR |
| 94 || type == U_SURROGATE) continue; |
| 95 |
| 96 s = i; |
| 97 characterTest(s, i, it); |
| 98 |
| 99 s += (UChar32)0x0345; //"\\u0345"; |
| 100 characterTest(s, i, it); |
| 101 } |
| 102 } |
| 103 |
| 104 void CanonicalIteratorTest::TestBasic() { |
| 105 |
| 106 UErrorCode status = U_ZERO_ERROR; |
| 107 |
| 108 static const char * const testArray[][2] = { |
| 109 {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u
0307, A\\u030A\\u1E0B\\u0327, " |
| 110 "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0
307, " |
| 111 "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u032
7, " |
| 112 "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u030
7"}, |
| 113 {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C,
\\u010D\\u017E"}, |
| 114 {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, |
| 115 }; |
| 116 |
| 117 #if 0 |
| 118 // This is not interesting for C/C++ as the data is already built beforehand |
| 119 // check build |
| 120 UnicodeSet ss = CanonicalIterator.getSafeStart(); |
| 121 logln("Safe Start: " + ss.toPattern(true)); |
| 122 ss = CanonicalIterator.getStarts('a'); |
| 123 expectEqual("Characters with 'a' at the start of their decomposition: ", "",
CanonicalIterator.getStarts('a'), |
| 124 new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB
" |
| 125 + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1
EAF\u1EB1\u1EB3\u1EB5\u1EB7]") |
| 126 ); |
| 127 #endif |
| 128 |
| 129 // check permute |
| 130 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed
to be sorted! |
| 131 |
| 132 Hashtable *permutations = new Hashtable(FALSE, status); |
| 133 permutations->setValueDeleter(uhash_deleteUnicodeString); |
| 134 UnicodeString toPermute("ABC"); |
| 135 |
| 136 CanonicalIterator::permute(toPermute, FALSE, permutations, status); |
| 137 |
| 138 logln("testing permutation"); |
| 139 |
| 140 expectEqual("Simple permutation ", "", collectionToString(permutations), "AB
C, ACB, BAC, BCA, CAB, CBA"); |
| 141 |
| 142 delete permutations; |
| 143 |
| 144 // try samples |
| 145 logln("testing samples"); |
| 146 Hashtable *set = new Hashtable(FALSE, status); |
| 147 set->setValueDeleter(uhash_deleteUnicodeString); |
| 148 int32_t i = 0; |
| 149 CanonicalIterator it("", status); |
| 150 if(U_SUCCESS(status)) { |
| 151 for (i = 0; i < ARRAY_LENGTH(testArray); ++i) { |
| 152 //logln("Results for: " + name.transliterate(testArray[i])); |
| 153 UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); |
| 154 it.setSource(testStr, status); |
| 155 set->removeAll(); |
| 156 for (;;) { |
| 157 //UnicodeString *result = new UnicodeString(it.next()); |
| 158 UnicodeString result(it.next()); |
| 159 if (result.isBogus()) { |
| 160 break; |
| 161 } |
| 162 set->put(result, new UnicodeString(result), status); // Add result
to the table |
| 163 //logln(++counter + ": " + hex.transliterate(result)); |
| 164 //logln(" = " + name.transliterate(result)); |
| 165 } |
| 166 expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicode
String(testArray[i][1])); |
| 167 |
| 168 } |
| 169 } else { |
| 170 dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorNam
e(status)); |
| 171 } |
| 172 delete set; |
| 173 } |
| 174 |
| 175 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, Canonica
lIterator &it) |
| 176 { |
| 177 UErrorCode status = U_ZERO_ERROR; |
| 178 UnicodeString decomp, comp; |
| 179 UBool gotDecomp = FALSE; |
| 180 UBool gotComp = FALSE; |
| 181 UBool gotSource = FALSE; |
| 182 |
| 183 Normalizer::decompose(s, FALSE, 0, decomp, status); |
| 184 Normalizer::compose(s, FALSE, 0, comp, status); |
| 185 |
| 186 // skip characters that don't have either decomp. |
| 187 // need quick test for this! |
| 188 if (s == decomp && s == comp) { |
| 189 return; |
| 190 } |
| 191 |
| 192 it.setSource(s, status); |
| 193 |
| 194 for (;;) { |
| 195 UnicodeString item = it.next(); |
| 196 if (item.isBogus()) break; |
| 197 if (item == s) gotSource = TRUE; |
| 198 if (item == decomp) gotDecomp = TRUE; |
| 199 if (item == comp) gotComp = TRUE; |
| 200 } |
| 201 |
| 202 if (!gotSource || !gotDecomp || !gotComp) { |
| 203 errln("FAIL CanonicalIterator: " + s + (int)ch); |
| 204 } |
| 205 } |
| 206 |
| 207 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const Unic
odeString &item, const UnicodeString &a, const UnicodeString &b) { |
| 208 if (!(a==b)) { |
| 209 errln("FAIL: " + message + getReadable(item)); |
| 210 errln("\t" + getReadable(a)); |
| 211 errln("\t" + getReadable(b)); |
| 212 } else { |
| 213 logln("Checked: " + message + getReadable(item)); |
| 214 logln("\t" + getReadable(a)); |
| 215 logln("\t" + getReadable(b)); |
| 216 } |
| 217 } |
| 218 |
| 219 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { |
| 220 UErrorCode status = U_ZERO_ERROR; |
| 221 UnicodeString result = "["; |
| 222 if (s.length() == 0) return ""; |
| 223 // set up for readable display |
| 224 #if !UCONFIG_NO_TRANSLITERATION |
| 225 if(verbose) { |
| 226 if (nameTrans == NULL) |
| 227 nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRA
NS_FORWARD, status); |
| 228 UnicodeString sName = s; |
| 229 nameTrans->transliterate(sName); |
| 230 result += sName; |
| 231 result += ";"; |
| 232 } |
| 233 if (hexTrans == NULL) |
| 234 hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_F
ORWARD, status); |
| 235 #endif |
| 236 UnicodeString sHex = s; |
| 237 #if !UCONFIG_NO_TRANSLITERATION |
| 238 if(hexTrans) { // maybe there is no data and transliterator cannot be instan
tiated |
| 239 hexTrans->transliterate(sHex); |
| 240 } |
| 241 #endif |
| 242 result += sHex; |
| 243 result += "]"; |
| 244 return result; |
| 245 //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transli
terate(s) + "]"; |
| 246 } |
| 247 |
| 248 U_CFUNC int U_CALLCONV |
| 249 compareUnicodeStrings(const void *s1, const void *s2) { |
| 250 UnicodeString **st1 = (UnicodeString **)s1; |
| 251 UnicodeString **st2 = (UnicodeString **)s2; |
| 252 |
| 253 return (*st1)->compare(**st2); |
| 254 } |
| 255 |
| 256 |
| 257 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { |
| 258 UnicodeString result; |
| 259 |
| 260 // Iterate over the Hashtable, then qsort. |
| 261 |
| 262 UnicodeString **resArray = new UnicodeString*[col->count()]; |
| 263 int32_t i = 0; |
| 264 |
| 265 const UHashElement *ne = NULL; |
| 266 int32_t el = -1; |
| 267 //Iterator it = basic.iterator(); |
| 268 ne = col->nextElement(el); |
| 269 //while (it.hasNext()) |
| 270 while (ne != NULL) { |
| 271 //String item = (String) it.next(); |
| 272 UnicodeString *item = (UnicodeString *)(ne->value.pointer); |
| 273 resArray[i++] = item; |
| 274 ne = col->nextElement(el); |
| 275 } |
| 276 |
| 277 for(i = 0; i<col->count(); ++i) { |
| 278 logln(*resArray[i]); |
| 279 } |
| 280 |
| 281 qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings
); |
| 282 |
| 283 result = *resArray[0]; |
| 284 |
| 285 for(i = 1; i<col->count(); ++i) { |
| 286 result += ", "; |
| 287 result += *resArray[i]; |
| 288 } |
| 289 |
| 290 /* |
| 291 Iterator it = col.iterator(); |
| 292 while (it.hasNext()) { |
| 293 if (result.length() != 0) result.append(", "); |
| 294 result.append(it.next().toString()); |
| 295 } |
| 296 */ |
| 297 |
| 298 delete [] resArray; |
| 299 |
| 300 return result; |
| 301 } |
| 302 |
| 303 void CanonicalIteratorTest::TestAPI() { |
| 304 UErrorCode status = U_ZERO_ERROR; |
| 305 // Test reset and getSource |
| 306 UnicodeString start("ljubav"); |
| 307 logln("Testing CanonicalIterator::getSource"); |
| 308 logln("Instantiating canonical iterator with string "+start); |
| 309 CanonicalIterator can(start, status); |
| 310 if (U_FAILURE(status)) { |
| 311 dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); |
| 312 return; |
| 313 } |
| 314 UnicodeString source = can.getSource(); |
| 315 logln("CanonicalIterator::getSource returned "+source); |
| 316 if(start != source) { |
| 317 errln("CanonicalIterator.getSource() didn't return the starting string. Expe
cted "+start+", got "+source); |
| 318 } |
| 319 logln("Testing CanonicalIterator::reset"); |
| 320 UnicodeString next = can.next(); |
| 321 logln("CanonicalIterator::next returned "+next); |
| 322 |
| 323 can.reset(); |
| 324 |
| 325 UnicodeString afterReset = can.next(); |
| 326 logln("After reset, CanonicalIterator::next returned "+afterReset); |
| 327 |
| 328 if(next != afterReset) { |
| 329 errln("Next after instantiation ("+next+") is different from next after rese
t ("+afterReset+")."); |
| 330 } |
| 331 |
| 332 logln("Testing getStaticClassID and getDynamicClassID"); |
| 333 if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ |
| 334 errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticCla
ssID"); |
| 335 } |
| 336 } |
| 337 |
| 338 #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
OLD | NEW |