| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * Copyright (C) 2010-2014, International Business Machines | |
| 4 * Corporation and others. All Rights Reserved. | |
| 5 ******************************************************************************* | |
| 6 * file name: ucharstrietest.cpp | |
| 7 * encoding: US-ASCII | |
| 8 * tab size: 8 (not used) | |
| 9 * indentation:4 | |
| 10 * | |
| 11 * created on: 2010nov16 | |
| 12 * created by: Markus W. Scherer | |
| 13 */ | |
| 14 | |
| 15 #include <string.h> | |
| 16 | |
| 17 #include "unicode/utypes.h" | |
| 18 #include "unicode/appendable.h" | |
| 19 #include "unicode/localpointer.h" | |
| 20 #include "unicode/ucharstrie.h" | |
| 21 #include "unicode/ucharstriebuilder.h" | |
| 22 #include "unicode/uniset.h" | |
| 23 #include "unicode/unistr.h" | |
| 24 #include "intltest.h" | |
| 25 #include "cmemory.h" | |
| 26 | |
| 27 struct StringAndValue { | |
| 28 const char *s; | |
| 29 int32_t value; | |
| 30 }; | |
| 31 | |
| 32 class UCharsTrieTest : public IntlTest { | |
| 33 public: | |
| 34 UCharsTrieTest(); | |
| 35 virtual ~UCharsTrieTest(); | |
| 36 | |
| 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=
NULL); | |
| 38 void TestBuilder(); | |
| 39 void TestEmpty(); | |
| 40 void Test_a(); | |
| 41 void Test_a_ab(); | |
| 42 void TestShortestBranch(); | |
| 43 void TestBranches(); | |
| 44 void TestLongSequence(); | |
| 45 void TestLongBranch(); | |
| 46 void TestValuesForState(); | |
| 47 void TestCompact(); | |
| 48 void TestFirstForCodePoint(); | |
| 49 void TestNextForCodePoint(); | |
| 50 | |
| 51 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); | |
| 52 void TestLargeTrie(); | |
| 53 | |
| 54 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); | |
| 55 void TestHasUniqueValue(); | |
| 56 void TestGetNextUChars(); | |
| 57 void TestIteratorFromBranch(); | |
| 58 void TestIteratorFromLinearMatch(); | |
| 59 void TestTruncatingIteratorFromRoot(); | |
| 60 void TestTruncatingIteratorFromLinearMatchShort(); | |
| 61 void TestTruncatingIteratorFromLinearMatchLong(); | |
| 62 void TestIteratorFromUChars(); | |
| 63 | |
| 64 void checkData(const StringAndValue data[], int32_t dataLength); | |
| 65 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieB
uildOption buildOption); | |
| 66 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, | |
| 67 UStringTrieBuildOption buildOption); | |
| 68 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataL
ength); | |
| 69 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLe
ngth); | |
| 70 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32
_t dataLength); | |
| 71 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t
dataLength); | |
| 72 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t da
taLength); | |
| 73 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[],
int32_t dataLength); | |
| 74 | |
| 75 private: | |
| 76 UCharsTrieBuilder *builder_; | |
| 77 }; | |
| 78 | |
| 79 extern IntlTest *createUCharsTrieTest() { | |
| 80 return new UCharsTrieTest(); | |
| 81 } | |
| 82 | |
| 83 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { | |
| 84 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); | |
| 85 builder_=new UCharsTrieBuilder(errorCode); | |
| 86 } | |
| 87 | |
| 88 UCharsTrieTest::~UCharsTrieTest() { | |
| 89 delete builder_; | |
| 90 } | |
| 91 | |
| 92 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name
, char * /*par*/) { | |
| 93 if(exec) { | |
| 94 logln("TestSuite UCharsTrieTest: "); | |
| 95 } | |
| 96 TESTCASE_AUTO_BEGIN; | |
| 97 TESTCASE_AUTO(TestBuilder); | |
| 98 TESTCASE_AUTO(TestEmpty); | |
| 99 TESTCASE_AUTO(Test_a); | |
| 100 TESTCASE_AUTO(Test_a_ab); | |
| 101 TESTCASE_AUTO(TestShortestBranch); | |
| 102 TESTCASE_AUTO(TestBranches); | |
| 103 TESTCASE_AUTO(TestLongSequence); | |
| 104 TESTCASE_AUTO(TestLongBranch); | |
| 105 TESTCASE_AUTO(TestValuesForState); | |
| 106 TESTCASE_AUTO(TestCompact); | |
| 107 TESTCASE_AUTO(TestFirstForCodePoint); | |
| 108 TESTCASE_AUTO(TestNextForCodePoint); | |
| 109 TESTCASE_AUTO(TestLargeTrie); | |
| 110 TESTCASE_AUTO(TestHasUniqueValue); | |
| 111 TESTCASE_AUTO(TestGetNextUChars); | |
| 112 TESTCASE_AUTO(TestIteratorFromBranch); | |
| 113 TESTCASE_AUTO(TestIteratorFromLinearMatch); | |
| 114 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); | |
| 115 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); | |
| 116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); | |
| 117 TESTCASE_AUTO(TestIteratorFromUChars); | |
| 118 TESTCASE_AUTO_END; | |
| 119 } | |
| 120 | |
| 121 void UCharsTrieTest::TestBuilder() { | |
| 122 IcuTestErrorCode errorCode(*this, "TestBuilder()"); | |
| 123 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); | |
| 124 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { | |
| 125 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR
"); | |
| 126 return; | |
| 127 } | |
| 128 // TODO: remove .build(...) once add() checks for duplicates. | |
| 129 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BU
ILD_FAST, errorCode); | |
| 130 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { | |
| 131 errln("UCharsTrieBuilder.add() did not detect duplicates"); | |
| 132 return; | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 void UCharsTrieTest::TestEmpty() { | |
| 137 static const StringAndValue data[]={ | |
| 138 { "", 0 } | |
| 139 }; | |
| 140 checkData(data, UPRV_LENGTHOF(data)); | |
| 141 } | |
| 142 | |
| 143 void UCharsTrieTest::Test_a() { | |
| 144 static const StringAndValue data[]={ | |
| 145 { "a", 1 } | |
| 146 }; | |
| 147 checkData(data, UPRV_LENGTHOF(data)); | |
| 148 } | |
| 149 | |
| 150 void UCharsTrieTest::Test_a_ab() { | |
| 151 static const StringAndValue data[]={ | |
| 152 { "a", 1 }, | |
| 153 { "ab", 100 } | |
| 154 }; | |
| 155 checkData(data, UPRV_LENGTHOF(data)); | |
| 156 } | |
| 157 | |
| 158 void UCharsTrieTest::TestShortestBranch() { | |
| 159 static const StringAndValue data[]={ | |
| 160 { "a", 1000 }, | |
| 161 { "b", 2000 } | |
| 162 }; | |
| 163 checkData(data, UPRV_LENGTHOF(data)); | |
| 164 } | |
| 165 | |
| 166 void UCharsTrieTest::TestBranches() { | |
| 167 static const StringAndValue data[]={ | |
| 168 { "a", 0x10 }, | |
| 169 { "cc", 0x40 }, | |
| 170 { "e", 0x100 }, | |
| 171 { "ggg", 0x400 }, | |
| 172 { "i", 0x1000 }, | |
| 173 { "kkkk", 0x4000 }, | |
| 174 { "n", 0x10000 }, | |
| 175 { "ppppp", 0x40000 }, | |
| 176 { "r", 0x100000 }, | |
| 177 { "sss", 0x200000 }, | |
| 178 { "t", 0x400000 }, | |
| 179 { "uu", 0x800000 }, | |
| 180 { "vv", 0x7fffffff }, | |
| 181 { "zz", (int32_t)0x80000000 } | |
| 182 }; | |
| 183 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { | |
| 184 logln("TestBranches length=%d", (int)length); | |
| 185 checkData(data, length); | |
| 186 } | |
| 187 } | |
| 188 | |
| 189 void UCharsTrieTest::TestLongSequence() { | |
| 190 static const StringAndValue data[]={ | |
| 191 { "a", -1 }, | |
| 192 // sequence of linear-match nodes | |
| 193 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, | |
| 194 // more than 256 units | |
| 195 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 196 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } | |
| 201 }; | |
| 202 checkData(data, UPRV_LENGTHOF(data)); | |
| 203 } | |
| 204 | |
| 205 void UCharsTrieTest::TestLongBranch() { | |
| 206 // Split-branch and interesting compact-integer values. | |
| 207 static const StringAndValue data[]={ | |
| 208 { "a", -2 }, | |
| 209 { "b", -1 }, | |
| 210 { "c", 0 }, | |
| 211 { "d2", 1 }, | |
| 212 { "f", 0x3f }, | |
| 213 { "g", 0x40 }, | |
| 214 { "h", 0x41 }, | |
| 215 { "j23", 0x1900 }, | |
| 216 { "j24", 0x19ff }, | |
| 217 { "j25", 0x1a00 }, | |
| 218 { "k2", 0x1a80 }, | |
| 219 { "k3", 0x1aff }, | |
| 220 { "l234567890", 0x1b00 }, | |
| 221 { "l234567890123", 0x1b01 }, | |
| 222 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, | |
| 223 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, | |
| 224 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, | |
| 225 { "r", 0x333333 }, | |
| 226 { "s2345", 0x4444444 }, | |
| 227 { "t234567890", 0x77777777 }, | |
| 228 { "z", (int32_t)0x80000001 } | |
| 229 }; | |
| 230 checkData(data, UPRV_LENGTHOF(data)); | |
| 231 } | |
| 232 | |
| 233 void UCharsTrieTest::TestValuesForState() { | |
| 234 // Check that saveState() and resetToState() interact properly | |
| 235 // with next() and current(). | |
| 236 static const StringAndValue data[]={ | |
| 237 { "a", -1 }, | |
| 238 { "ab", -2 }, | |
| 239 { "abc", -3 }, | |
| 240 { "abcd", -4 }, | |
| 241 { "abcde", -5 }, | |
| 242 { "abcdef", -6 } | |
| 243 }; | |
| 244 checkData(data, UPRV_LENGTHOF(data)); | |
| 245 } | |
| 246 | |
| 247 void UCharsTrieTest::TestCompact() { | |
| 248 // Duplicate trailing strings and values provide opportunities for compactin
g. | |
| 249 static const StringAndValue data[]={ | |
| 250 { "+", 0 }, | |
| 251 { "+august", 8 }, | |
| 252 { "+december", 12 }, | |
| 253 { "+july", 7 }, | |
| 254 { "+june", 6 }, | |
| 255 { "+november", 11 }, | |
| 256 { "+october", 10 }, | |
| 257 { "+september", 9 }, | |
| 258 { "-", 0 }, | |
| 259 { "-august", 8 }, | |
| 260 { "-december", 12 }, | |
| 261 { "-july", 7 }, | |
| 262 { "-june", 6 }, | |
| 263 { "-november", 11 }, | |
| 264 { "-october", 10 }, | |
| 265 { "-september", 9 }, | |
| 266 // The l+n branch (with its sub-nodes) is a duplicate but will be writte
n | |
| 267 // both times because each time it follows a different linear-match node
. | |
| 268 { "xjuly", 7 }, | |
| 269 { "xjune", 6 } | |
| 270 }; | |
| 271 checkData(data, UPRV_LENGTHOF(data)); | |
| 272 } | |
| 273 | |
| 274 void UCharsTrieTest::TestFirstForCodePoint() { | |
| 275 static const StringAndValue data[]={ | |
| 276 { "a", 1 }, | |
| 277 { "a\\ud800", 2 }, | |
| 278 { "a\\U00010000", 3 }, | |
| 279 { "\\ud840", 4 }, | |
| 280 { "\\U00020000\\udbff", 5 }, | |
| 281 { "\\U00020000\\U0010ffff", 6 }, | |
| 282 { "\\U00020000\\U0010ffffz", 7 }, | |
| 283 { "\\U00050000xy", 8 }, | |
| 284 { "\\U00050000xyz", 9 } | |
| 285 }; | |
| 286 checkData(data, UPRV_LENGTHOF(data)); | |
| 287 } | |
| 288 | |
| 289 void UCharsTrieTest::TestNextForCodePoint() { | |
| 290 static const StringAndValue data[]={ | |
| 291 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }
, | |
| 292 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, | |
| 293 { "\\u4dff\\U000103ff", 99999 } | |
| 294 }; | |
| 295 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTR
IE_BUILD_FAST)); | |
| 296 if(trie.isNull()) { | |
| 297 return; // buildTrie() reported an error | |
| 298 } | |
| 299 UStringTrieResult result; | |
| 300 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!
=trie->current() || | |
| 301 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result
!=trie->current() || | |
| 302 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!
=trie->current() || | |
| 303 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result
!=trie->current() || | |
| 304 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!
=trie->current() || | |
| 305 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || re
sult!=trie->current() || | |
| 306 trie->getValue()!=2000000000 | |
| 307 ) { | |
| 308 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); | |
| 309 } | |
| 310 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result
!=trie->current() || | |
| 311 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result
!=trie->current() || | |
| 312 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!
=trie->current() || | |
| 313 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || res
ult!=trie->current() || | |
| 314 trie->getValue()!=44444 | |
| 315 ) { | |
| 316 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); | |
| 317 } | |
| 318 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE ||
result!=trie->current() || | |
| 319 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result
!=trie->current() || | |
| 320 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!
=trie->current() || | |
| 321 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result
!=trie->current() // no match for trail surrogate | |
| 322 ) { | |
| 323 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999
\\U00020222"); | |
| 324 } | |
| 325 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE ||
result!=trie->current() || | |
| 326 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || res
ult!=trie->current() || | |
| 327 trie->getValue()!=99999 | |
| 328 ) { | |
| 329 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 // Definitions in the anonymous namespace are invisible outside this file. | |
| 334 namespace { | |
| 335 | |
| 336 // Generate (string, value) pairs. | |
| 337 // The first string (before next()) will be empty. | |
| 338 class Generator { | |
| 339 public: | |
| 340 Generator() : value(4711), num(0) {} | |
| 341 void next() { | |
| 342 UChar c; | |
| 343 s.truncate(0); | |
| 344 s.append(c=(UChar)(value>>16)); | |
| 345 s.append((UChar)(value>>4)); | |
| 346 if(value&1) { | |
| 347 s.append((UChar)value); | |
| 348 } | |
| 349 set.add(c); | |
| 350 value+=((value>>5)&0x7ff)*3+1; | |
| 351 ++num; | |
| 352 } | |
| 353 const UnicodeString &getString() const { return s; } | |
| 354 int32_t getValue() const { return value; } | |
| 355 int32_t countUniqueFirstChars() const { return set.size(); } | |
| 356 int32_t getIndex() const { return num; } | |
| 357 | |
| 358 private: | |
| 359 UnicodeString s; | |
| 360 UnicodeSet set; | |
| 361 int32_t value; | |
| 362 int32_t num; | |
| 363 }; | |
| 364 | |
| 365 } // end namespace | |
| 366 | |
| 367 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { | |
| 368 IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); | |
| 369 Generator gen; | |
| 370 builder_->clear(); | |
| 371 while(gen.countUniqueFirstChars()<numUniqueFirst) { | |
| 372 builder_->add(gen.getString(), gen.getValue(), errorCode); | |
| 373 gen.next(); | |
| 374 } | |
| 375 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)g
en.getIndex()); | |
| 376 UnicodeString trieUChars; | |
| 377 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
| 378 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
| 379 return new UCharsTrie(trieUChars.getBuffer()); | |
| 380 } | |
| 381 | |
| 382 // Exercise a large branch node. | |
| 383 void UCharsTrieTest::TestLargeTrie() { | |
| 384 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); | |
| 385 if(trie.isNull()) { | |
| 386 return; // buildTrie() reported an error | |
| 387 } | |
| 388 Generator gen; | |
| 389 while(gen.countUniqueFirstChars()<1111) { | |
| 390 UnicodeString x(gen.getString()); | |
| 391 int32_t value=gen.getValue(); | |
| 392 if(!x.isEmpty()) { | |
| 393 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { | |
| 394 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string
%ld\n", | |
| 395 x[0], (long)gen.getIndex()); | |
| 396 break; | |
| 397 } | |
| 398 x.remove(0, 1); | |
| 399 } | |
| 400 UStringTrieResult result=trie->next(x.getBuffer(), x.length()); | |
| 401 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=t
rie->getValue()) { | |
| 402 errln("next(%d chars U+%04X U+%04X)!=hasValue or " | |
| 403 "next()!=current() or getValue() wrong " | |
| 404 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.get
Index()); | |
| 405 break; | |
| 406 } | |
| 407 gen.next(); | |
| 408 } | |
| 409 } | |
| 410 | |
| 411 enum { | |
| 412 u_a=0x61, | |
| 413 u_b=0x62, | |
| 414 u_c=0x63, | |
| 415 u_j=0x6a, | |
| 416 u_n=0x6e, | |
| 417 u_r=0x72, | |
| 418 u_u=0x75, | |
| 419 u_y=0x79 | |
| 420 }; | |
| 421 | |
| 422 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption)
{ | |
| 423 // All types of nodes leading to the same value, | |
| 424 // for code coverage of recursive functions. | |
| 425 // In particular, we need a lot of branches on some single level | |
| 426 // to exercise a split-branch node. | |
| 427 static const StringAndValue data[]={ | |
| 428 { "august", 8 }, | |
| 429 { "jan", 1 }, | |
| 430 { "jan.", 1 }, | |
| 431 { "jana", 1 }, | |
| 432 { "janbb", 1 }, | |
| 433 { "janc", 1 }, | |
| 434 { "janddd", 1 }, | |
| 435 { "janee", 1 }, | |
| 436 { "janef", 1 }, | |
| 437 { "janf", 1 }, | |
| 438 { "jangg", 1 }, | |
| 439 { "janh", 1 }, | |
| 440 { "janiiii", 1 }, | |
| 441 { "janj", 1 }, | |
| 442 { "jankk", 1 }, | |
| 443 { "jankl", 1 }, | |
| 444 { "jankmm", 1 }, | |
| 445 { "janl", 1 }, | |
| 446 { "janm", 1 }, | |
| 447 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
| 448 { "jano", 1 }, | |
| 449 { "janpp", 1 }, | |
| 450 { "janqqq", 1 }, | |
| 451 { "janr", 1 }, | |
| 452 { "januar", 1 }, | |
| 453 { "january", 1 }, | |
| 454 { "july", 7 }, | |
| 455 { "jun", 6 }, | |
| 456 { "jun.", 6 }, | |
| 457 { "june", 6 } | |
| 458 }; | |
| 459 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); | |
| 460 } | |
| 461 | |
| 462 void UCharsTrieTest::TestHasUniqueValue() { | |
| 463 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
| 464 if(trie.isNull()) { | |
| 465 return; // buildTrie() reported an error | |
| 466 } | |
| 467 int32_t uniqueValue; | |
| 468 if(trie->hasUniqueValue(uniqueValue)) { | |
| 469 errln("unique value at root"); | |
| 470 } | |
| 471 trie->next(u_j); | |
| 472 trie->next(u_a); | |
| 473 trie->next(u_n); | |
| 474 // hasUniqueValue() directly after next() | |
| 475 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { | |
| 476 errln("not unique value 1 after \"jan\""); | |
| 477 } | |
| 478 trie->first(u_j); | |
| 479 trie->next(u_u); | |
| 480 if(trie->hasUniqueValue(uniqueValue)) { | |
| 481 errln("unique value after \"ju\""); | |
| 482 } | |
| 483 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { | |
| 484 errln("not normal value 6 after \"jun\""); | |
| 485 } | |
| 486 // hasUniqueValue() after getValue() | |
| 487 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { | |
| 488 errln("not unique value 6 after \"jun\""); | |
| 489 } | |
| 490 // hasUniqueValue() from within a linear-match node | |
| 491 trie->first(u_a); | |
| 492 trie->next(u_u); | |
| 493 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { | |
| 494 errln("not unique value 8 after \"au\""); | |
| 495 } | |
| 496 } | |
| 497 | |
| 498 void UCharsTrieTest::TestGetNextUChars() { | |
| 499 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
| 500 if(trie.isNull()) { | |
| 501 return; // buildTrie() reported an error | |
| 502 } | |
| 503 UnicodeString buffer; | |
| 504 UnicodeStringAppendable app(buffer); | |
| 505 int32_t count=trie->getNextUChars(app); | |
| 506 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { | |
| 507 errln("months getNextUChars()!=[aj] at root"); | |
| 508 } | |
| 509 trie->next(u_j); | |
| 510 trie->next(u_a); | |
| 511 trie->next(u_n); | |
| 512 // getNextUChars() directly after next() | |
| 513 buffer.remove(); | |
| 514 count=trie->getNextUChars(app); | |
| 515 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
| 516 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); | |
| 517 } | |
| 518 // getNextUChars() after getValue() | |
| 519 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. | |
| 520 buffer.remove(); | |
| 521 count=trie->getNextUChars(app); | |
| 522 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { | |
| 523 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getV
alue()"); | |
| 524 } | |
| 525 // getNextUChars() from a linear-match node | |
| 526 trie->next(u_u); | |
| 527 buffer.remove(); | |
| 528 count=trie->getNextUChars(app); | |
| 529 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { | |
| 530 errln("months getNextUChars()!=[a] after \"janu\""); | |
| 531 } | |
| 532 trie->next(u_a); | |
| 533 buffer.remove(); | |
| 534 count=trie->getNextUChars(app); | |
| 535 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { | |
| 536 errln("months getNextUChars()!=[r] after \"janua\""); | |
| 537 } | |
| 538 trie->next(u_r); | |
| 539 trie->next(u_y); | |
| 540 // getNextUChars() after a final match | |
| 541 buffer.remove(); | |
| 542 count=trie->getNextUChars(app); | |
| 543 if(count!=0 || buffer.length()!=0) { | |
| 544 errln("months getNextUChars()!=[] after \"january\""); | |
| 545 } | |
| 546 } | |
| 547 | |
| 548 void UCharsTrieTest::TestIteratorFromBranch() { | |
| 549 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
| 550 if(trie.isNull()) { | |
| 551 return; // buildTrie() reported an error | |
| 552 } | |
| 553 // Go to a branch node. | |
| 554 trie->next(u_j); | |
| 555 trie->next(u_a); | |
| 556 trie->next(u_n); | |
| 557 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); | |
| 558 UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
| 559 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor"))
{ | |
| 560 return; | |
| 561 } | |
| 562 // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
| 563 // following "jan". | |
| 564 static const StringAndValue data[]={ | |
| 565 { "", 1 }, | |
| 566 { ".", 1 }, | |
| 567 { "a", 1 }, | |
| 568 { "bb", 1 }, | |
| 569 { "c", 1 }, | |
| 570 { "ddd", 1 }, | |
| 571 { "ee", 1 }, | |
| 572 { "ef", 1 }, | |
| 573 { "f", 1 }, | |
| 574 { "gg", 1 }, | |
| 575 { "h", 1 }, | |
| 576 { "iiii", 1 }, | |
| 577 { "j", 1 }, | |
| 578 { "kk", 1 }, | |
| 579 { "kl", 1 }, | |
| 580 { "kmm", 1 }, | |
| 581 { "l", 1 }, | |
| 582 { "m", 1 }, | |
| 583 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, | |
| 584 { "o", 1 }, | |
| 585 { "pp", 1 }, | |
| 586 { "qqq", 1 }, | |
| 587 { "r", 1 }, | |
| 588 { "uar", 1 }, | |
| 589 { "uary", 1 } | |
| 590 }; | |
| 591 checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
| 592 // Reset, and we should get the same result. | |
| 593 logln("after iter.reset()"); | |
| 594 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
| 595 } | |
| 596 | |
| 597 void UCharsTrieTest::TestIteratorFromLinearMatch() { | |
| 598 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); | |
| 599 if(trie.isNull()) { | |
| 600 return; // buildTrie() reported an error | |
| 601 } | |
| 602 // Go into a linear-match node. | |
| 603 trie->next(u_j); | |
| 604 trie->next(u_a); | |
| 605 trie->next(u_n); | |
| 606 trie->next(u_u); | |
| 607 trie->next(u_a); | |
| 608 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); | |
| 609 UCharsTrie::Iterator iter(*trie, 0, errorCode); | |
| 610 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor"))
{ | |
| 611 return; | |
| 612 } | |
| 613 // Expected data: Same as in buildMonthsTrie(), except only the suffixes | |
| 614 // following "janua". | |
| 615 static const StringAndValue data[]={ | |
| 616 { "r", 1 }, | |
| 617 { "ry", 1 } | |
| 618 }; | |
| 619 checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
| 620 // Reset, and we should get the same result. | |
| 621 logln("after iter.reset()"); | |
| 622 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
| 623 } | |
| 624 | |
| 625 void UCharsTrieTest::TestTruncatingIteratorFromRoot() { | |
| 626 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); | |
| 627 if(trie.isNull()) { | |
| 628 return; // buildTrie() reported an error | |
| 629 } | |
| 630 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); | |
| 631 UCharsTrie::Iterator iter(*trie, 4, errorCode); | |
| 632 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor"))
{ | |
| 633 return; | |
| 634 } | |
| 635 // Expected data: Same as in buildMonthsTrie(), except only the first 4 char
acters | |
| 636 // of each string, and no string duplicates from the truncation. | |
| 637 static const StringAndValue data[]={ | |
| 638 { "augu", -1 }, | |
| 639 { "jan", 1 }, | |
| 640 { "jan.", 1 }, | |
| 641 { "jana", 1 }, | |
| 642 { "janb", -1 }, | |
| 643 { "janc", 1 }, | |
| 644 { "jand", -1 }, | |
| 645 { "jane", -1 }, | |
| 646 { "janf", 1 }, | |
| 647 { "jang", -1 }, | |
| 648 { "janh", 1 }, | |
| 649 { "jani", -1 }, | |
| 650 { "janj", 1 }, | |
| 651 { "jank", -1 }, | |
| 652 { "janl", 1 }, | |
| 653 { "janm", 1 }, | |
| 654 { "jann", -1 }, | |
| 655 { "jano", 1 }, | |
| 656 { "janp", -1 }, | |
| 657 { "janq", -1 }, | |
| 658 { "janr", 1 }, | |
| 659 { "janu", -1 }, | |
| 660 { "july", 7 }, | |
| 661 { "jun", 6 }, | |
| 662 { "jun.", 6 }, | |
| 663 { "june", 6 } | |
| 664 }; | |
| 665 checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
| 666 // Reset, and we should get the same result. | |
| 667 logln("after iter.reset()"); | |
| 668 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); | |
| 669 } | |
| 670 | |
| 671 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { | |
| 672 static const StringAndValue data[]={ | |
| 673 { "abcdef", 10 }, | |
| 674 { "abcdepq", 200 }, | |
| 675 { "abcdeyz", 3000 } | |
| 676 }; | |
| 677 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTR
IE_BUILD_FAST)); | |
| 678 if(trie.isNull()) { | |
| 679 return; // buildTrie() reported an error | |
| 680 } | |
| 681 // Go into a linear-match node. | |
| 682 trie->next(u_a); | |
| 683 trie->next(u_b); | |
| 684 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShor
t()"); | |
| 685 // Truncate within the linear-match node. | |
| 686 UCharsTrie::Iterator iter(*trie, 2, errorCode); | |
| 687 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor"))
{ | |
| 688 return; | |
| 689 } | |
| 690 static const StringAndValue expected[]={ | |
| 691 { "cd", -1 } | |
| 692 }; | |
| 693 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); | |
| 694 // Reset, and we should get the same result. | |
| 695 logln("after iter.reset()"); | |
| 696 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); | |
| 697 } | |
| 698 | |
| 699 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { | |
| 700 static const StringAndValue data[]={ | |
| 701 { "abcdef", 10 }, | |
| 702 { "abcdepq", 200 }, | |
| 703 { "abcdeyz", 3000 } | |
| 704 }; | |
| 705 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTR
IE_BUILD_FAST)); | |
| 706 if(trie.isNull()) { | |
| 707 return; // buildTrie() reported an error | |
| 708 } | |
| 709 // Go into a linear-match node. | |
| 710 trie->next(u_a); | |
| 711 trie->next(u_b); | |
| 712 trie->next(u_c); | |
| 713 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong
()"); | |
| 714 // Truncate after the linear-match node. | |
| 715 UCharsTrie::Iterator iter(*trie, 3, errorCode); | |
| 716 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor"))
{ | |
| 717 return; | |
| 718 } | |
| 719 static const StringAndValue expected[]={ | |
| 720 { "def", 10 }, | |
| 721 { "dep", -1 }, | |
| 722 { "dey", -1 } | |
| 723 }; | |
| 724 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); | |
| 725 // Reset, and we should get the same result. | |
| 726 logln("after iter.reset()"); | |
| 727 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); | |
| 728 } | |
| 729 | |
| 730 void UCharsTrieTest::TestIteratorFromUChars() { | |
| 731 static const StringAndValue data[]={ | |
| 732 { "mm", 3 }, | |
| 733 { "mmm", 33 }, | |
| 734 { "mmnop", 333 } | |
| 735 }; | |
| 736 builder_->clear(); | |
| 737 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); | |
| 738 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { | |
| 739 builder_->add(data[i].s, data[i].value, errorCode); | |
| 740 } | |
| 741 UnicodeString trieUChars; | |
| 742 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); | |
| 743 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); | |
| 744 checkIterator(iter, data, UPRV_LENGTHOF(data)); | |
| 745 } | |
| 746 | |
| 747 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength)
{ | |
| 748 logln("checkData(dataLength=%d, fast)", (int)dataLength); | |
| 749 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); | |
| 750 logln("checkData(dataLength=%d, small)", (int)dataLength); | |
| 751 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); | |
| 752 } | |
| 753 | |
| 754 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength,
UStringTrieBuildOption buildOption) { | |
| 755 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); | |
| 756 if(trie.isNull()) { | |
| 757 return; // buildTrie() reported an error | |
| 758 } | |
| 759 checkFirst(*trie, data, dataLength); | |
| 760 checkNext(*trie, data, dataLength); | |
| 761 checkNextWithState(*trie, data, dataLength); | |
| 762 checkNextString(*trie, data, dataLength); | |
| 763 checkIterator(*trie, data, dataLength); | |
| 764 } | |
| 765 | |
| 766 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataL
ength, | |
| 767 UStringTrieBuildOption buildOption) { | |
| 768 IcuTestErrorCode errorCode(*this, "buildTrie()"); | |
| 769 // Add the items to the trie builder in an interesting (not trivial, not ran
dom) order. | |
| 770 int32_t index, step; | |
| 771 if(dataLength&1) { | |
| 772 // Odd number of items. | |
| 773 index=dataLength/2; | |
| 774 step=2; | |
| 775 } else if((dataLength%3)!=0) { | |
| 776 // Not a multiple of 3. | |
| 777 index=dataLength/5; | |
| 778 step=3; | |
| 779 } else { | |
| 780 index=dataLength-1; | |
| 781 step=-1; | |
| 782 } | |
| 783 builder_->clear(); | |
| 784 for(int32_t i=0; i<dataLength; ++i) { | |
| 785 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), | |
| 786 data[index].value, errorCode); | |
| 787 index=(index+step)%dataLength; | |
| 788 } | |
| 789 UnicodeString trieUChars; | |
| 790 builder_->buildUnicodeString(buildOption, trieUChars, errorCode); | |
| 791 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); | |
| 792 if(!errorCode.logIfFailureAndReset("add()/build()")) { | |
| 793 builder_->add("zzz", 999, errorCode); | |
| 794 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { | |
| 795 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); | |
| 796 } | |
| 797 } | |
| 798 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); | |
| 799 UnicodeString trieUChars2; | |
| 800 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); | |
| 801 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { | |
| 802 errln("builder.buildUnicodeString() before & after build() returned same
array"); | |
| 803 } | |
| 804 if(errorCode.isFailure()) { | |
| 805 return NULL; | |
| 806 } | |
| 807 // Tries from either build() method should be identical but | |
| 808 // UCharsTrie does not implement equals(). | |
| 809 // We just return either one. | |
| 810 if((dataLength&1)!=0) { | |
| 811 return trie.orphan(); | |
| 812 } else { | |
| 813 return new UCharsTrie(trieUChars2.getBuffer()); | |
| 814 } | |
| 815 } | |
| 816 | |
| 817 void UCharsTrieTest::checkFirst(UCharsTrie &trie, | |
| 818 const StringAndValue data[], int32_t dataLength)
{ | |
| 819 for(int32_t i=0; i<dataLength; ++i) { | |
| 820 if(*data[i].s==0) { | |
| 821 continue; // skip empty string | |
| 822 } | |
| 823 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unesca
pe(); | |
| 824 UChar32 c=expectedString[0]; | |
| 825 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; | |
| 826 UStringTrieResult firstResult=trie.first(c); | |
| 827 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue()
: -1; | |
| 828 UStringTrieResult nextResult=trie.next(nextCp); | |
| 829 if(firstResult!=trie.reset().next(c) || | |
| 830 firstResult!=trie.current() || | |
| 831 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -
1) || | |
| 832 nextResult!=trie.next(nextCp) | |
| 833 ) { | |
| 834 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", | |
| 835 c, data[i].s); | |
| 836 } | |
| 837 c=expectedString.char32At(0); | |
| 838 int32_t cLength=U16_LENGTH(c); | |
| 839 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength
) : 0; | |
| 840 firstResult=trie.firstForCodePoint(c); | |
| 841 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; | |
| 842 nextResult=trie.nextForCodePoint(nextCp); | |
| 843 if(firstResult!=trie.reset().nextForCodePoint(c) || | |
| 844 firstResult!=trie.current() || | |
| 845 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -
1) || | |
| 846 nextResult!=trie.nextForCodePoint(nextCp) | |
| 847 ) { | |
| 848 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint
(same) for %s", | |
| 849 c, data[i].s); | |
| 850 } | |
| 851 } | |
| 852 trie.reset(); | |
| 853 } | |
| 854 | |
| 855 void UCharsTrieTest::checkNext(UCharsTrie &trie, | |
| 856 const StringAndValue data[], int32_t dataLength)
{ | |
| 857 UCharsTrie::State state; | |
| 858 for(int32_t i=0; i<dataLength; ++i) { | |
| 859 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unesca
pe(); | |
| 860 int32_t stringLength= (i&1) ? -1 : expectedString.length(); | |
| 861 UStringTrieResult result; | |
| 862 if( !USTRINGTRIE_HAS_VALUE( | |
| 863 result=trie.next(expectedString.getTerminatedBuffer(), stringLen
gth)) || | |
| 864 result!=trie.current() | |
| 865 ) { | |
| 866 errln("trie does not seem to contain %s", data[i].s); | |
| 867 } else if(trie.getValue()!=data[i].value) { | |
| 868 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx"
, | |
| 869 data[i].s, | |
| 870 (long)trie.getValue(), (long)trie.getValue(), | |
| 871 (long)data[i].value, (long)data[i].value); | |
| 872 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { | |
| 873 errln("trie value for %s changes when repeating current()/getValue()
", data[i].s); | |
| 874 } | |
| 875 trie.reset(); | |
| 876 stringLength=expectedString.length(); | |
| 877 result=trie.current(); | |
| 878 for(int32_t j=0; j<stringLength; ++j) { | |
| 879 if(!USTRINGTRIE_HAS_NEXT(result)) { | |
| 880 errln("trie.current()!=hasNext before end of %s (at index %d)",
data[i].s, j); | |
| 881 break; | |
| 882 } | |
| 883 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { | |
| 884 trie.getValue(); | |
| 885 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { | |
| 886 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_V
ALUE before end of %s (at index %d)", data[i].s, j); | |
| 887 break; | |
| 888 } | |
| 889 } | |
| 890 result=trie.next(expectedString[j]); | |
| 891 if(!USTRINGTRIE_MATCHES(result)) { | |
| 892 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at ind
ex %d)", data[i].s, j); | |
| 893 break; | |
| 894 } | |
| 895 if(result!=trie.current()) { | |
| 896 errln("trie.next()!=following current() before end of %s (at ind
ex %d)", data[i].s, j); | |
| 897 break; | |
| 898 } | |
| 899 } | |
| 900 if(!USTRINGTRIE_HAS_VALUE(result)) { | |
| 901 errln("trie.next()!=hasValue at the end of %s", data[i].s); | |
| 902 continue; | |
| 903 } | |
| 904 trie.getValue(); | |
| 905 if(result!=trie.current()) { | |
| 906 errln("trie.current() != current()+getValue()+current() after end of
%s", | |
| 907 data[i].s); | |
| 908 } | |
| 909 // Compare the final current() with whether next() can actually continue
. | |
| 910 trie.saveState(state); | |
| 911 UBool nextContinues=FALSE; | |
| 912 for(int32_t c=0x20; c<0xe000; ++c) { | |
| 913 if(c==0x80) { | |
| 914 c=0xd800; // Check for ASCII and surrogates but not all of the
BMP. | |
| 915 } | |
| 916 if(trie.resetToState(state).next(c)) { | |
| 917 nextContinues=TRUE; | |
| 918 break; | |
| 919 } | |
| 920 } | |
| 921 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { | |
| 922 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts
" | |
| 923 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s
", data[i].s); | |
| 924 } | |
| 925 trie.reset(); | |
| 926 } | |
| 927 } | |
| 928 | |
| 929 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, | |
| 930 const StringAndValue data[], int32_t dat
aLength) { | |
| 931 UCharsTrie::State noState, state; | |
| 932 for(int32_t i=0; i<dataLength; ++i) { | |
| 933 if((i&1)==0) { | |
| 934 // This should have no effect. | |
| 935 trie.resetToState(noState); | |
| 936 } | |
| 937 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unesca
pe(); | |
| 938 int32_t stringLength=expectedString.length(); | |
| 939 int32_t partialLength=stringLength/3; | |
| 940 for(int32_t j=0; j<partialLength; ++j) { | |
| 941 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { | |
| 942 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", dat
a[i].s); | |
| 943 return; | |
| 944 } | |
| 945 } | |
| 946 trie.saveState(state); | |
| 947 UStringTrieResult resultAtState=trie.current(); | |
| 948 UStringTrieResult result; | |
| 949 int32_t valueAtState=-99; | |
| 950 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { | |
| 951 valueAtState=trie.getValue(); | |
| 952 } | |
| 953 result=trie.next(0); // mismatch | |
| 954 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { | |
| 955 errln("trie.next(0) matched after part of %s", data[i].s); | |
| 956 } | |
| 957 if( resultAtState!=trie.resetToState(state).current() || | |
| 958 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue
()) | |
| 959 ) { | |
| 960 errln("trie.next(part of %s) changes current()/getValue() after " | |
| 961 "saveState/next(0)/resetToState", | |
| 962 data[i].s); | |
| 963 } else if(!USTRINGTRIE_HAS_VALUE( | |
| 964 result=trie.next(expectedString.getTerminatedBuffer()+part
ialLength, | |
| 965 stringLength-partialLength)) || | |
| 966 result!=trie.current()) { | |
| 967 errln("trie.next(rest of %s) does not seem to contain %s after " | |
| 968 "saveState/next(0)/resetToState", | |
| 969 data[i].s, data[i].s); | |
| 970 } else if(!USTRINGTRIE_HAS_VALUE( | |
| 971 result=trie.resetToState(state). | |
| 972 next(expectedString.getTerminatedBuffer()+part
ialLength, | |
| 973 stringLength-partialLength)) || | |
| 974 result!=trie.current()) { | |
| 975 errln("trie does not seem to contain %s after saveState/next(rest)/r
esetToState", | |
| 976 data[i].s); | |
| 977 } else if(trie.getValue()!=data[i].value) { | |
| 978 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx"
, | |
| 979 data[i].s, | |
| 980 (long)trie.getValue(), (long)trie.getValue(), | |
| 981 (long)data[i].value, (long)data[i].value); | |
| 982 } | |
| 983 trie.reset(); | |
| 984 } | |
| 985 } | |
| 986 | |
| 987 // next(string) is also tested in other functions, | |
| 988 // but here we try to go partway through the string, and then beyond it. | |
| 989 void UCharsTrieTest::checkNextString(UCharsTrie &trie, | |
| 990 const StringAndValue data[], int32_t dataLe
ngth) { | |
| 991 for(int32_t i=0; i<dataLength; ++i) { | |
| 992 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unesca
pe(); | |
| 993 int32_t stringLength=expectedString.length(); | |
| 994 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { | |
| 995 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s
", data[i].s); | |
| 996 continue; | |
| 997 } | |
| 998 // Test that we stop properly at the end of the string. | |
| 999 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, | |
| 1000 stringLength+1-stringLength/2)) { | |
| 1001 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].
s); | |
| 1002 } | |
| 1003 trie.reset(); | |
| 1004 } | |
| 1005 } | |
| 1006 | |
| 1007 void UCharsTrieTest::checkIterator(UCharsTrie &trie, | |
| 1008 const StringAndValue data[], int32_t dataLeng
th) { | |
| 1009 IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
| 1010 UCharsTrie::Iterator iter(trie, 0, errorCode); | |
| 1011 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) construc
tor")) { | |
| 1012 return; | |
| 1013 } | |
| 1014 checkIterator(iter, data, dataLength); | |
| 1015 } | |
| 1016 | |
| 1017 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, | |
| 1018 const StringAndValue data[], int32_t dataLeng
th) { | |
| 1019 IcuTestErrorCode errorCode(*this, "checkIterator()"); | |
| 1020 for(int32_t i=0; i<dataLength; ++i) { | |
| 1021 if(!iter.hasNext()) { | |
| 1022 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[
i].s); | |
| 1023 break; | |
| 1024 } | |
| 1025 UBool hasNext=iter.next(errorCode); | |
| 1026 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s"
, (int)i, data[i].s)) { | |
| 1027 break; | |
| 1028 } | |
| 1029 if(!hasNext) { | |
| 1030 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].
s); | |
| 1031 break; | |
| 1032 } | |
| 1033 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unesca
pe(); | |
| 1034 if(iter.getString()!=expectedString) { | |
| 1035 char buffer[1000]; | |
| 1036 UnicodeString invString(prettify(iter.getString())); | |
| 1037 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffe
r), US_INV); | |
| 1038 errln("trie iterator next().getString()=%s but expected %s for item
%d", | |
| 1039 buffer, data[i].s, (int)i); | |
| 1040 } | |
| 1041 if(iter.getValue()!=data[i].value) { | |
| 1042 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x
%lx for item %d: %s", | |
| 1043 (long)iter.getValue(), (long)iter.getValue(), | |
| 1044 (long)data[i].value, (long)data[i].value, | |
| 1045 (int)i, data[i].s); | |
| 1046 } | |
| 1047 } | |
| 1048 if(iter.hasNext()) { | |
| 1049 errln("trie iterator hasNext()=TRUE after all items"); | |
| 1050 } | |
| 1051 UBool hasNext=iter.next(errorCode); | |
| 1052 errorCode.logIfFailureAndReset("trie iterator next() after all items"); | |
| 1053 if(hasNext) { | |
| 1054 errln("trie iterator next()=TRUE after all items"); | |
| 1055 } | |
| 1056 } | |
| OLD | NEW |