source/test/intltest/itspoof.cpp - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/intltest/itspoof.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 **********************************************************************

3 * Copyright (C) 2011-2015, International Business Machines Corporation

4 * and others. All Rights Reserved.

5 **********************************************************************

6 */

7 /**

8 * IntlTestSpoof tests for USpoofDetector

9 */

10

11 #include "unicode/utypes.h"

12

13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_ FILE_IO

14

15 #include "itspoof.h"

16

17 #include "unicode/normlzr.h"

18 #include "unicode/regex.h"

19 #include "unicode/unistr.h"

20 #include "unicode/uscript.h"

21 #include "unicode/uspoof.h"

22

23 #include "cstring.h"

24 #include "identifier_info.h"

25 #include "scriptset.h"

26 #include "uhash.h"

27

28 #include <stdlib.h>

29 #include <stdio.h>

30

31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \

32 errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LI NE__, u_errorName(status));}}

33

34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \

35 errln("Test Failure at file %s, line %d: \"%s\" is false.", __FILE__, __LINE __, #expr);};}

36

37 #define TEST_ASSERT_MSG(expr, msg) {if ((expr)==FALSE) { \

38 dataerrln("Test Failure at file %s, line %d, %s: \"%s\" is false.", __FILE__ , __LINE__, msg, #expr);};}

39

40 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \

41 errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d)", \

42 __FILE__, __LINE__, #a, (a), #b, (b)); }}

43

44 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \

45 errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d)", \

46 __FILE__, __LINE__, #a, (a), #b, (b)); }}

47

48 /*

49 * TEST_SETUP and TEST_TEARDOWN

50 * macros to handle the boilerplate around setting up test case.

51 * Put arbitrary test code between SETUP and TEARDOWN.

52 * "sc" is the ready-to-go SpoofChecker for use in the tests.

53 */

54 #define TEST_SETUP { \

55 UErrorCode status = U_ZERO_ERROR; \

56 USpoofChecker *sc; \

57 sc = uspoof_open(&status); \

58 TEST_ASSERT_SUCCESS(status); \

59 if (U_SUCCESS(status)){

60

61 #define TEST_TEARDOWN \

62 } \

63 TEST_ASSERT_SUCCESS(status); \

64 uspoof_close(sc); \

65 }

66

67

68

69

70 void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name , char* /par/ )

71 {

72 if (exec) logln("TestSuite spoof: ");

73 switch (index) {

74 case 0:

75 name = "TestSpoofAPI";

76 if (exec) {

77 testSpoofAPI();

78 }

79 break;

80 case 1:

81 name = "TestSkeleton";

82 if (exec) {

83 testSkeleton();

84 }

85 break;

86 case 2:

87 name = "TestAreConfusable";

88 if (exec) {

89 testAreConfusable();

90 }

91 break;

92 case 3:

93 name = "TestInvisible";

94 if (exec) {

95 testInvisible();

96 }

97 break;

98 case 4:

99 name = "testConfData";

100 if (exec) {

101 testConfData();

102 }

103 break;

104 case 5:

105 name = "testBug8654";

106 if (exec) {

107 testBug8654();

108 }

109 break;

110 case 6:

111 name = "testIdentifierInfo";

112 if (exec) {

113 testIdentifierInfo();

114 }

115 break;

116 case 7:

117 name = "testScriptSet";

118 if (exec) {

119 testScriptSet();

120 }

121 break;

122 case 8:

123 name = "testRestrictionLevel";

124 if (exec) {

125 testRestrictionLevel();

126 }

127 break;

128 case 9:

129 name = "testMixedNumbers";

130 if (exec) {

131 testMixedNumbers();

132 }

133 break;

134

135

136 default: name=""; break;

137 }

138 }

139

140 void IntlTestSpoof::testSpoofAPI() {

141

142 TEST_SETUP

143 UnicodeString s("xyz"); // Many latin ranges are whole-script confusabl e with other scripts.

144 // If this test starts failing, consult confusa blesWholeScript.txt

145 int32_t position = 666;

146 int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &stat us);

147 TEST_ASSERT_SUCCESS(status);

148 TEST_ASSERT_EQ(0, checkResults);

149 TEST_ASSERT_EQ(0, position);

150 TEST_TEARDOWN;

151

152 TEST_SETUP

153 UnicodeString s1("cxs");

154 UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape(); / / Cyrillic "cxs"

155 int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &st atus);

156 TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE \| USPOOF_WHOLE_SCRIPT_CONF USABLE, checkResults);

157

158 TEST_TEARDOWN;

159

160 TEST_SETUP

161 UnicodeString s("I1l0O");

162 UnicodeString dest;

163 UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_C ASE, s, dest, &status);

164 TEST_ASSERT_SUCCESS(status);

165 TEST_ASSERT(UnicodeString("lllOO") == dest);

166 TEST_ASSERT(&dest == &retStr);

167 TEST_TEARDOWN;

168 }

169

170

171 #define CHECK_SKELETON(type, input, expected) { \

172 checkSkeleton(sc, type, input, expected, __LINE__); \

173 }

174

175

176 // testSkeleton. Spot check a number of confusable skeleton substitutions from the

177 // Unicode data file confusables.txt

178 // Test cases chosen for substitutions of various lengths, and

179 // membership in different mapping tables.

180 // Note: for ICU 55, all tables collapsed to the MA table data.

181 // TODO: for ICU 56 with Unicode 8, revisit this test.

182 //

183 void IntlTestSpoof::testSkeleton() {

184 const uint32_t ML = 0;

185 const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;

186 const uint32_t MA = USPOOF_ANY_CASE;

187 const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_ANY_CASE;

188

189 TEST_SETUP

190 CHECK_SKELETON(SL, "nochange", "nochange");

191 CHECK_SKELETON(SA, "nochange", "nochange");

192 CHECK_SKELETON(ML, "nochange", "nochange");

193 CHECK_SKELETON(MA, "nochange", "nochange");

194 CHECK_SKELETON(MA, "love", "love");

195 CHECK_SKELETON(MA, "1ove", "love"); // Digit 1 to letter l

196 CHECK_SKELETON(ML, "OOPS", "OOPS");

197 CHECK_SKELETON(ML, "00PS", "OOPS");

198 CHECK_SKELETON(MA, "OOPS", "OOPS");

199 CHECK_SKELETON(MA, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only

200 CHECK_SKELETON(SL, "\\u059c", "\\u0301");

201 CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");

202 CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll) "

203 CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006 c\\u0644\\u006f");

204

205 // This mapping exists in the ML and MA tables, does not exist in SL, SA

206 // 0C83 ; 0983 ; ML

207 // 0C83 ; 0983 ; MA

208 //

209

210 CHECK_SKELETON(SL, "\\u0C83", "\\u0983");

211 CHECK_SKELETON(SA, "\\u0C83", "\\u0983");

212 CHECK_SKELETON(ML, "\\u0C83", "\\u0983");

213 CHECK_SKELETON(MA, "\\u0C83", "\\u0983");

214

215 // 0391 mappings exist only in MA and SA tables.

216 CHECK_SKELETON(MA, "\\u0391", "A");

217 CHECK_SKELETON(SA, "\\u0391", "A");

218 CHECK_SKELETON(ML, "\\u0391", "A");

219 CHECK_SKELETON(SL, "\\u0391", "A");

220

221 // 13CF Mappings in all four tables, different in MA.

222 CHECK_SKELETON(ML, "\\u13CF", "b");

223 CHECK_SKELETON(MA, "\\u13CF", "b");

224 CHECK_SKELETON(SL, "\\u13CF", "b");

225 CHECK_SKELETON(SA, "\\u13CF", "b");

226

227 // 0022 ; 0027 0027 ;

228 // all tables.

229 CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");

230 CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");

231 CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");

232 CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");

233

234 // 017F mappings exist only in MA and SA tables.

235 CHECK_SKELETON(MA, "\\u017F", "f");

236 CHECK_SKELETON(SA, "\\u017F", "f");

237 CHECK_SKELETON(ML, "\\u017F", "f");

238 CHECK_SKELETON(SL, "\\u017F", "f");

239

240 TEST_TEARDOWN;

241 }

242

243

244 //

245 // Run a single confusable skeleton transformation test case.

246 //

247 void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,

248 const char input, const char expected, int32 _t lineNum) {

249 UnicodeString uInput = UnicodeString(input).unescape();

250 UnicodeString uExpected = UnicodeString(expected).unescape();

251

252 UErrorCode status = U_ZERO_ERROR;

253 UnicodeString actual;

254 uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);

255 if (U_FAILURE(status)) {

256 errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__ , __LINE__, lineNum,

257 u_errorName(status));

258 return;

259 }

260 if (uExpected != actual) {

261 errln("File %s, Line %d, Test case from line %d, Actual and Expected ske letons differ.",

262 __FILE__, __LINE__, lineNum);

263 errln(UnicodeString(" Actual Skeleton: \"") + actual + UnicodeString(" \"\n") +

264 UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeStrin g("\""));

265 }

266 }

267

268 void IntlTestSpoof::testAreConfusable() {

269 TEST_SETUP

270 UnicodeString s1("A long string that will overflow stack buffers. A lon g string that will overflow stack buffers. "

271 "A long string that will overflow stack buffers. A lon g string that will overflow stack buffers. ");

272 UnicodeString s2("A long string that wi11 overflow stack buffers. A lon g string that will overflow stack buffers. "

273 "A long string that wi11 overflow stack buffers. A lon g string that will overflow stack buffers. ");

274 TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnic odeString(sc, s1, s2, &status));

275 TEST_ASSERT_SUCCESS(status);

276

277 TEST_TEARDOWN;

278 }

279

280 void IntlTestSpoof::testInvisible() {

281 TEST_SETUP

282 UnicodeString s = UnicodeString("abcd\\u0301ef").unescape();

283 int32_t position = -42;

284 TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));

285 TEST_ASSERT_SUCCESS(status);

286 TEST_ASSERT(0 == position);

287

288 UnicodeString s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescap e();

289 TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &posi tion, &status));

290 TEST_ASSERT_SUCCESS(status);

291 TEST_ASSERT_EQ(0, position);

292

293 // Two acute accents, one from the composed a with acute accent, \u00e1,

294 // and one separate.

295 position = -42;

296 UnicodeString s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();

297 TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &posi tion, &status));

298 TEST_ASSERT_SUCCESS(status);

299 TEST_ASSERT_EQ(0, position);

300 TEST_TEARDOWN;

301 }

302

303 void IntlTestSpoof::testBug8654() {

304 TEST_SETUP

305 UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape();

306 int32_t position = -42;

307 TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &posit ion, &status) & USPOOF_INVISIBLE );

308 TEST_ASSERT_SUCCESS(status);

309 TEST_ASSERT_EQ(0, position);

310 TEST_TEARDOWN;

311 }

312

313 static UnicodeString parseHex(const UnicodeString &in) {

314 // Convert a series of hex numbers in a Unicode String to a string with the

315 // corresponding characters.

316 // The conversion is _really_ annoying. There must be some function to just do it.

317 UnicodeString result;

318 UChar32 cc = 0;

319 for (int32_t i=0; i<in.length(); i++) {

320 UChar c = in.charAt(i);

321 if (c == 0x20) { // Space

322 if (cc > 0) {

323 result.append(cc);

324 cc = 0;

325 }

326 } else if (c>=0x30 && c<=0x39) {

327 cc = (cc<<4) + (c - 0x30);

328 } else if ((c>=0x41 && c<=0x46) \|\| (c>=0x61 && c<=0x66)) {

329 cc = (cc<<4) + (c & 0x0f)+9;

330 }

331 // else do something with bad input.

332 }

333 if (cc > 0) {

334 result.append(cc);

335 }

336 return result;

337 }

338

339

340 //

341 // Append the hex form of a UChar32 to a UnicodeString.

342 // Used in formatting error messages.

343 // Match the formatting of numbers in confusables.txt

344 // Minimum of 4 digits, no leading zeroes for positions 5 and up.

345 //

346 static void appendHexUChar(UnicodeString &dest, UChar32 c) {

347 UBool doZeroes = FALSE;

348 for (int bitNum=28; bitNum>=0; bitNum-=4) {

349 if (bitNum <= 12) {

350 doZeroes = TRUE;

351 }

352 int hexDigit = (c>>bitNum) & 0x0f;

353 if (hexDigit != 0 \|\| doZeroes) {

354 doZeroes = TRUE;

355 dest.append((UChar)(hexDigit<=9? hexDigit + 0x30: hexDigit -10 + 0x4 1));

356 }

357 }

358 dest.append((UChar)0x20);

359 }

360

361 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);

362

363 // testConfData - Check each data item from the Unicode confusables.txt file,

364 // verify that it transforms correctly in a skeleton.

365 //

366 void IntlTestSpoof::testConfData() {

367 char buffer[2000];

368 if (getUnidataPath(buffer) == NULL) {

369 errln("Skipping test spoof/testConfData. Unable to find path to source/d ata/unidata/.");

370 return;

371 }

372 uprv_strcat(buffer, "confusables.txt");

373

374 LocalStdioFilePointer f(fopen(buffer, "rb"));

375 if (f.isNull()) {

376 errln("Skipping test spoof/testConfData. File confusables.txt not acces sible.");

377 return;

378 }

379 fseek(f.getAlias(), 0, SEEK_END);

380 int32_t fileSize = ftell(f.getAlias());

381 LocalArray<char> fileBuf(new char[fileSize]);

382 fseek(f.getAlias(), 0, SEEK_SET);

383 int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());

384 TEST_ASSERT_EQ(amt_read, fileSize);

385 TEST_ASSERT(fileSize>0);

386 if (amt_read != fileSize \|\| fileSize <=0) {

387 return;

388 }

389 UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.g etAlias(), fileSize));

390

391 UErrorCode status = U_ZERO_ERROR;

392 LocalUSpoofCheckerPointer sc(uspoof_open(&status));

393 TEST_ASSERT_SUCCESS(status);

394

395 // Parse lines from the confusables.txt file. Example Line:

396 // FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....

397 // Three fields. The hex fields can contain more than one character,

398 // and each character may be more than 4 digits (for supplemn tals)

399 // This regular expression matches lines and splits the fields into capture groups.

400 RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]?);([^#;]?);([^#]*)", confus ablesTxt, 0, status);

401 TEST_ASSERT_SUCCESS(status);

402 while (parseLine.find()) {

403 UnicodeString from = parseHex(parseLine.group(1, status));

404 if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {

405 // The source character was not NFD.

406 // Skip this case; the first step in obtaining a skeleton is to NFD the input,

407 // so the mapping in this line of confusables.txt will never be app lied.

408 continue;

409 }

410

411 UnicodeString rawExpected = parseHex(parseLine.group(2, status));

412 UnicodeString expected;

413 Normalizer::decompose(rawExpected, FALSE /NFD/, 0, expected, status);

414 TEST_ASSERT_SUCCESS(status);

415

416 int32_t skeletonType = 0;

417 UnicodeString tableType = parseLine.group(3, status);

418 TEST_ASSERT_SUCCESS(status);

419 if (tableType.indexOf("SL") >= 0) {

420 skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;

421 } else if (tableType.indexOf("SA") >= 0) {

422 skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE \| USPOOF_ANY_CASE;

423 } else if (tableType.indexOf("ML") >= 0) {

424 skeletonType = 0;

425 } else if (tableType.indexOf("MA") >= 0) {

426 skeletonType = USPOOF_ANY_CASE;

427 }

428

429 UnicodeString actual;

430 uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actua l, &status);

431 TEST_ASSERT_SUCCESS(status);

432 TEST_ASSERT(actual == expected);

433 if (actual != expected) {

434 errln(parseLine.group(0, status));

435 UnicodeString line = "Actual: ";

436 int i = 0;

437 while (i < actual.length()) {

438 appendHexUChar(line, actual.char32At(i));

439 i = actual.moveIndex32(i, 1);

440 }

441 errln(line);

442 }

443 if (U_FAILURE(status)) {

444 break;

445 }

446 }

447 }

448

449 // testIdentifierInfo. Note that IdentifierInfo is not public ICU API at this ti me

450 void IntlTestSpoof::testIdentifierInfo() {

451 UErrorCode status = U_ZERO_ERROR;

452 ScriptSet bitset12; bitset12.set(USCRIPT_LATIN, status).set(USCRIPT_HANGUL, status);

453 ScriptSet bitset2; bitset2.set(USCRIPT_HANGUL, status);

454 TEST_ASSERT(bitset12.contains(bitset2));

455 TEST_ASSERT(bitset12.contains(bitset12));

456 TEST_ASSERT(!bitset2.contains(bitset12));

457

458 ScriptSet arabSet; arabSet.set(USCRIPT_ARABIC, status);

459 ScriptSet latinSet; latinSet.set(USCRIPT_LATIN, status);

460 UElement arabEl; arabEl.pointer = &arabSet;

461 UElement latinEl; latinEl.pointer = &latinSet;

462 TEST_ASSERT(uhash_compareScriptSet(arabEl, latinEl) < 0);

463 TEST_ASSERT(uhash_compareScriptSet(latinEl, arabEl) > 0);

464

465 UnicodeString scriptString;

466 bitset12.displayScripts(scriptString);

467 TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang Latn") == scriptString);

468

469 status = U_ZERO_ERROR;

470 UHashtable *alternates = uhash_open(uhash_hashScriptSet ,uhash_compareScript Set, NULL, &status);

471 uhash_puti(alternates, &bitset12, 1, &status);

472 uhash_puti(alternates, &bitset2, 1, &status);

473 UnicodeString alternatesString;

474 IdentifierInfo::displayAlternates(alternatesString, alternates, status);

475 TEST_ASSERT(UNICODE_STRING_SIMPLE("Hang; Hang Latn") == alternatesString);

476 TEST_ASSERT_SUCCESS(status);

477

478 status = U_ZERO_ERROR;

479 ScriptSet tScriptSet;

480 tScriptSet.parseScripts(scriptString, status);

481 TEST_ASSERT_SUCCESS(status);

482 TEST_ASSERT(bitset12 == tScriptSet);

483 UnicodeString ss;

484 ss.remove();

485 uhash_close(alternates);

486

487 struct Test {

488 const char *fTestString;

489 URestrictionLevel fRestrictionLevel;

490 const char *fNumerics;

491 const char *fScripts;

492 const char *fAlternates;

493 const char *fCommonAlternates;

494 } tests[] = {

495 {"\\u0061\\u2665", USPOOF_UNRESTRICTIVE, "[]", " Latn", "", ""},

496 {"\\u0061\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", " Latn", "Hani Hira Kana", "Hani Hira Kana"},

497 {"\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", " Latn", "Hira Kana", "Hira Kana"},

498 {"\\u0061\\u30FC\\u3006\\u30A2", USPOOF_HIGHLY_RESTRICTIVE, "[]", " Latn Kana", "", ""},

499 {"\\u30A2\\u0061\\u30FC\\u3006", USPOOF_HIGHLY_RESTRICTIVE, "[]", " Latn Kana", "", ""},

500 {"\\u0061\\u0031\\u0661", USPOOF_UNRESTRICTIVE, "[\\u00 30\\u0660]", "Latn", "Arab Thaa", "Arab Thaa"},

501 {"\\u0061\\u0031\\u0661\\u06F1", USPOOF_UNRESTRICTIVE, "[\\u00 30\\u0660\\u06F0]", "Latn Arab", "", ""},

502 {"\\u0661\\u30FC\\u3006\\u0061\\u30A2\\u0031\\u0967\\u06F1", USPOOF _UNRESTRICTIVE,

503 "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},

504 {"\\u0061\\u30A2\\u30FC\\u3006\\u0031\\u0967\\u0661\\u06F1", USPOOF _UNRESTRICTIVE,

505 "[\\u0030\\u0660\\u06F0\\u0966]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"}

506 };

507

508 int testNum;

509 for (testNum = 0; testNum < UPRV_LENGTHOF(tests); testNum++) {

510 char testNumStr[40];

511 sprintf(testNumStr, "testNum = %d", testNum);

512 Test &test = tests[testNum];

513 status = U_ZERO_ERROR;

514 UnicodeString testString(test.fTestString); // Note: may do charset con version.

515 testString = testString.unescape();

516 IdentifierInfo idInfo(status);

517 TEST_ASSERT_SUCCESS(status);

518 idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));

519 idInfo.setIdentifier(testString, status);

520 TEST_ASSERT_MSG(*idInfo.getIdentifier() == testString, testNumStr);

521

522 URestrictionLevel restrictionLevel = test.fRestrictionLevel;

523 TEST_ASSERT_MSG(restrictionLevel == idInfo.getRestrictionLevel(status), testNumStr);

524

525 status = U_ZERO_ERROR;

526 UnicodeSet numerics(UnicodeString(test.fNumerics).unescape(), status);

527 TEST_ASSERT_SUCCESS(status);

528 TEST_ASSERT_MSG(numerics == *idInfo.getNumerics(), testNumStr);

529

530 ScriptSet scripts;

531 scripts.parseScripts(UnicodeString(test.fScripts), status);

532 TEST_ASSERT_MSG(scripts == *idInfo.getScripts(), testNumStr);

533

534 UnicodeString alternatesStr;

535 IdentifierInfo::displayAlternates(alternatesStr, idInfo.getAlternates(), status);

536 TEST_ASSERT_MSG(UnicodeString(test.fAlternates) == alternatesStr, testNu mStr);

537

538 ScriptSet commonAlternates;

539 commonAlternates.parseScripts(UnicodeString(test.fCommonAlternates), sta tus);

540 TEST_ASSERT_MSG(commonAlternates == *idInfo.getCommonAmongAlternates(), testNumStr);

541 }

542

543 // Test of getScriptCount()

544 // Script and or Script Extension for chars used in the tests

545 // \\u3013 ; Bopo Hang Hani Hira Kana # So GETA MARK

546 // \\uA838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK

547 // \\u0951 ; Deva Latn # Mn DEVANAGARI STRESS SIGN UDATTA

548 //

549 // \\u0370 ; Greek # L GREEK CAPITAL LETTER H ETA

550 // \\u0481 ; Cyrillic # L& CYRILLIC SMALL LETTER KOPPA

551 // \\u0904 ; Devanagari # Lo DEVANAGARI LETTER SHOR T A

552 // \\u3041 ; Hiragana # Lo HIRAGANA LETTER SMALL A

553 // 1234 ; Common # ascii digits

554 // \\u0300 ; Inherited # Mn COMBINING GRAVE ACCENT

555

556 struct ScriptTest {

557 const char *fTestString;

558 int32_t fScriptCount;

559 } scriptTests[] = {

560 {"Hello", 1},

561 {"Hello\\u0370", 2},

562 {"1234", 0},

563 {"Hello1234\\u0300", 1}, // Common and Inherited are ignored.

564 {"\\u0030", 0},

565 {"abc\\u0951", 1},

566 {"abc\\u3013", 2},

567 {"\\uA838\\u0951", 1}, // Triggers commonAmongAlternates path.

568 {"\\u3013\\uA838", 2}

569 };

570

571 status = U_ZERO_ERROR;

572 IdentifierInfo identifierInfo(status);

573 for (testNum=0; testNum<UPRV_LENGTHOF(scriptTests); testNum++) {

574 ScriptTest &test = scriptTests[testNum];

575 char msgBuf[100];

576 sprintf(msgBuf, "testNum = %d ", testNum);

577 UnicodeString testString = UnicodeString(test.fTestString).unescape();

578

579 status = U_ZERO_ERROR;

580 identifierInfo.setIdentifier(testString, status);

581 int32_t scriptCount = identifierInfo.getScriptCount();

582 TEST_ASSERT_MSG(test.fScriptCount == scriptCount, msgBuf);

583 }

584 }

585

586 void IntlTestSpoof::testScriptSet() {

587 ScriptSet s1;

588 ScriptSet s2;

589 UErrorCode status = U_ZERO_ERROR;

590

591 TEST_ASSERT(s1 == s2);

592 s1.set(USCRIPT_ARABIC,status);

593 TEST_ASSERT_SUCCESS(status);

594 TEST_ASSERT(!(s1 == s2));

595 TEST_ASSERT(s1.test(USCRIPT_ARABIC, status));

596 TEST_ASSERT(s1.test(USCRIPT_GREEK, status) == FALSE);

597

598 status = U_ZERO_ERROR;

599 s1.reset(USCRIPT_ARABIC, status);

600 TEST_ASSERT(s1 == s2);

601

602 status = U_ZERO_ERROR;

603 s1.setAll();

604 TEST_ASSERT(s1.test(USCRIPT_COMMON, status));

605 TEST_ASSERT(s1.test(USCRIPT_ETHIOPIC, status));

606 TEST_ASSERT(s1.test(USCRIPT_CODE_LIMIT, status));

607 s1.resetAll();

608 TEST_ASSERT(!s1.test(USCRIPT_COMMON, status));

609 TEST_ASSERT(!s1.test(USCRIPT_ETHIOPIC, status));

610 TEST_ASSERT(!s1.test(USCRIPT_CODE_LIMIT, status));

611

612 status = U_ZERO_ERROR;

613 s1.set(USCRIPT_TAKRI, status);

614 s1.set(USCRIPT_BLISSYMBOLS, status);

615 s2.setAll();

616 TEST_ASSERT(s2.contains(s1));

617 TEST_ASSERT(!s1.contains(s2));

618 TEST_ASSERT(s2.intersects(s1));

619 TEST_ASSERT(s1.intersects(s2));

620 s2.reset(USCRIPT_TAKRI, status);

621 TEST_ASSERT(!s2.contains(s1));

622 TEST_ASSERT(!s1.contains(s2));

623 TEST_ASSERT(s1.intersects(s2));

624 TEST_ASSERT(s2.intersects(s1));

625 TEST_ASSERT_SUCCESS(status);

626

627 status = U_ZERO_ERROR;

628 s1.resetAll();

629 s1.set(USCRIPT_NKO, status);

630 s1.set(USCRIPT_COMMON, status);

631 s2 = s1;

632 TEST_ASSERT(s2 == s1);

633 TEST_ASSERT_EQ(2, s2.countMembers());

634 s2.intersect(s1);

635 TEST_ASSERT(s2 == s1);

636 s2.setAll();

637 TEST_ASSERT(!(s2 == s1));

638 TEST_ASSERT(s2.countMembers() >= USCRIPT_CODE_LIMIT);

639 s2.intersect(s1);

640 TEST_ASSERT(s2 == s1);

641

642 s2.setAll();

643 s2.reset(USCRIPT_COMMON, status);

644 s2.intersect(s1);

645 TEST_ASSERT(s2.countMembers() == 1);

646

647 s1.resetAll();

648 s1.set(USCRIPT_AFAKA, status);

649 s1.set(USCRIPT_VAI, status);

650 s1.set(USCRIPT_INHERITED, status);

651 int32_t n = -1;

652 for (int32_t i=0; i<4; i++) {

653 n = s1.nextSetBit(n+1);

654 switch (i) {

655 case 0: TEST_ASSERT_EQ(USCRIPT_INHERITED, n); break;

656 case 1: TEST_ASSERT_EQ(USCRIPT_VAI, n); break;

657 case 2: TEST_ASSERT_EQ(USCRIPT_AFAKA, n); break;

658 case 3: TEST_ASSERT_EQ(-1, (int32_t)n); break;

659 default: TEST_ASSERT(FALSE);

660 }

661 }

662 TEST_ASSERT_SUCCESS(status);

663 }

664

665

666 void IntlTestSpoof::testRestrictionLevel() {

667 struct Test {

668 const char *fId;

669 URestrictionLevel fExpectedRestrictionLevel;

670 } tests[] = {

671 {"\\u0061\\u03B3\\u2665", USPOOF_UNRESTRICTIVE},

672 {"a", USPOOF_ASCII},

673 {"\\u03B3", USPOOF_SINGLE_SCRIPT_RESTRICTIVE},

674 {"\\u0061\\u30A2\\u30FC", USPOOF_HIGHLY_RESTRICTIVE},

675 {"\\u0061\\u0904", USPOOF_MODERATELY_RESTRICTIVE},

676 {"\\u0061\\u03B3", USPOOF_MINIMALLY_RESTRICTIVE}

677 };

678 char msgBuffer[100];

679

680 URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT _RESTRICTIVE,

681 USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMA LLY_RESTRICTIVE,

682 USPOOF_UNRESTRICTIVE};

683

684 UErrorCode status = U_ZERO_ERROR;

685 IdentifierInfo idInfo(status);

686 TEST_ASSERT_SUCCESS(status);

687 idInfo.setIdentifierProfile(*uspoof_getRecommendedUnicodeSet(&status));

688 TEST_ASSERT_SUCCESS(status);

689 for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {

690 status = U_ZERO_ERROR;

691 const Test &test = tests[testNum];

692 UnicodeString testString = UnicodeString(test.fId).unescape();

693 URestrictionLevel expectedLevel = test.fExpectedRestrictionLevel;

694 idInfo.setIdentifier(testString, status);

695 sprintf(msgBuffer, "testNum = %d ", testNum);

696 TEST_ASSERT_SUCCESS(status);

697 TEST_ASSERT_MSG(expectedLevel == idInfo.getRestrictionLevel(status), msg Buffer);

698 for (int levelIndex=0; levelIndex<UPRV_LENGTHOF(restrictionLevels); leve lIndex++) {

699 status = U_ZERO_ERROR;

700 URestrictionLevel levelSetInSpoofChecker = restrictionLevels[levelIn dex];

701 USpoofChecker *sc = uspoof_open(&status);

702 uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status);

703 uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &statu s);

704 uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);

705 int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL , &status);

706

707 // we want to fail if the text is (say) MODERATE and the testLevel i s ASCII

708 int32_t expectedValue = 0;

709 if (expectedLevel > levelSetInSpoofChecker) {

710 expectedValue \|= USPOOF_RESTRICTION_LEVEL;

711 }

712 if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testStrin g)) {

713 expectedValue \|= USPOOF_CHAR_LIMIT;

714 }

715 sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, a ctual = %#x",

716 testNum, levelIndex, expectedValue, actualValue);

717 TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer);

718 TEST_ASSERT_SUCCESS(status);

719

720 // Run the same check again, with the Spoof Checker configured to re turn

721 // the actual restriction level.

722 uspoof_setChecks(sc, USPOOF_AUX_INFO \| USPOOF_RESTRICTION_LEVEL, &st atus);

723 uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &statu s);

724 uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);

725 int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &st atus);

726 TEST_ASSERT_SUCCESS(status);

727 if (U_SUCCESS(status)) {

728 TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_ MASK);

729 TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS);

730 }

731 uspoof_close(sc);

732 }

733 }

734 }

735

736

737 void IntlTestSpoof::testMixedNumbers() {

738 struct Test {

739 const char *fTestString;

740 const char *fExpectedSet;

741 } tests[] = {

742 {"1", "[0]"},

743 {"\\u0967", "[\\u0966]"},

744 {"1\\u0967", "[0\\u0966]"},

745 {"\\u0661\\u06F1", "[\\u0660\\u06F0]"}

746 };

747 UErrorCode status = U_ZERO_ERROR;

748 IdentifierInfo idInfo(status);

749 for (int32_t testNum=0; testNum < UPRV_LENGTHOF(tests); testNum++) {

750 char msgBuf[100];

751 sprintf(msgBuf, "testNum = %d ", testNum);

752 Test &test = tests[testNum];

753

754 status = U_ZERO_ERROR;

755 UnicodeString testString = UnicodeString(test.fTestString).unescape();

756 UnicodeSet expectedSet(UnicodeString(test.fExpectedSet).unescape(), stat us);

757 idInfo.setIdentifier(testString, status);

758 TEST_ASSERT_SUCCESS(status);

759 TEST_ASSERT_MSG(expectedSet == *idInfo.getNumerics(), msgBuf);

760

761 status = U_ZERO_ERROR;

762 USpoofChecker *sc = uspoof_open(&status);

763 uspoof_setChecks(sc, USPOOF_MIXED_NUMBERS, &status); // only check this

764 int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status );

765 UBool mixedNumberFailure = ((result & USPOOF_MIXED_NUMBERS) != 0);

766 TEST_ASSERT_MSG((expectedSet.size() > 1) == mixedNumberFailure, msgBuf);

767 uspoof_close(sc);

768 }

769 }

770

771 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONF IG_NO_FILE_IO */

OLD	NEW

« no previous file with comments | « source/test/intltest/itspoof.h ('k') | source/test/intltest/ittrans.h » ('j') | no next file with comments »