source/test/cintltst/cucdtst.c - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/cintltst/cucdtst.c

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /********************************************************************

2 * COPYRIGHT:

3 * Copyright (c) 1997-2015, International Business Machines Corporation and

4 * others. All Rights Reserved.

5 ********************************************************************/

6 /*******************************************************************************

7 *

8 * File CUCDTST.C

9 *

10 * Modification History:

11 * Name Description

12 * Madhu Katragadda Ported for C API, added tests for string funct ions

13 ********************************************************************************

14 */

15

16 #include <string.h>

17 #include <math.h>

18 #include <stdlib.h>

19

20 #include "unicode/utypes.h"

21 #include "unicode/uchar.h"

22 #include "unicode/putil.h"

23 #include "unicode/ustring.h"

24 #include "unicode/uloc.h"

25 #include "unicode/unorm2.h"

26

27 #include "cintltst.h"

28 #include "putilimp.h"

29 #include "uparse.h"

30 #include "ucase.h"

31 #include "ubidi_props.h"

32 #include "uprops.h"

33 #include "uset_imp.h"

34 #include "usc_impl.h"

35 #include "udatamem.h" /* for testing ucase_openBinary() */

36 #include "cucdapi.h"

37 #include "cmemory.h"

38

39 /* prototypes --------------------------------------------------------------- */

40

41 static void TestUpperLower(void);

42 static void TestLetterNumber(void);

43 static void TestMisc(void);

44 static void TestPOSIX(void);

45 static void TestControlPrint(void);

46 static void TestIdentifier(void);

47 static void TestUnicodeData(void);

48 static void TestCodeUnit(void);

49 static void TestCodePoint(void);

50 static void TestCharLength(void);

51 static void TestCharNames(void);

52 static void TestUCharFromNameUnderflow(void);

53 static void TestMirroring(void);

54 static void TestUScriptRunAPI(void);

55 static void TestAdditionalProperties(void);

56 static void TestNumericProperties(void);

57 static void TestPropertyNames(void);

58 static void TestPropertyValues(void);

59 static void TestConsistency(void);

60 static void TestUCase(void);

61 static void TestUBiDiProps(void);

62 static void TestCaseFolding(void);

63

64 /* internal methods used */

65 static int32_t MakeProp(char* str);

66 static int32_t MakeDir(char* str);

67

68 /* helpers ------------------------------------------------------------------ */

69

70 static void

71 parseUCDFile(const char *filename,

72 char *fields[][2], int32_t fieldCount,

73 UParseLineFn lineFn, void context,

74 UErrorCode *pErrorCode) {

75 char path[256];

76 char backupPath[256];

77

78 if(U_FAILURE(*pErrorCode)) {

79 return;

80 }

81

82 /* Look inside ICU_DATA first */

83 strcpy(path, u_getDataDirectory());

84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);

85 strcat(path, filename);

86

87 /* As a fallback, try to guess where the source data was located

88 * at the time ICU was built, and look there.

89 */

90 strcpy(backupPath, ctest_dataSrcDir());

91 strcat(backupPath, U_FILE_SEP_STRING);

92 strcat(backupPath, "unidata" U_FILE_SEP_STRING);

93 strcat(backupPath, filename);

94

95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorC ode);

96 if(*pErrorCode==U_FILE_ACCESS_ERROR) {

97 *pErrorCode=U_ZERO_ERROR;

98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, contex t, pErrorCode);

99 }

100 if(U_FAILURE(*pErrorCode)) {

101 log_err_status(pErrorCode, "error parsing %s: %s\n", filename, u_errorN ame(pErrorCode));

102 }

103 }

104

105 /* test data ---------------------------------------------------------------- */

106

107 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPe PoSmScSkSoPiPf";

108 static const int32_t tagValues[] =

109 {

110 /* Mn */ U_NON_SPACING_MARK,

111 /* Mc */ U_COMBINING_SPACING_MARK,

112 /* Me */ U_ENCLOSING_MARK,

113 /* Nd */ U_DECIMAL_DIGIT_NUMBER,

114 /* Nl */ U_LETTER_NUMBER,

115 /* No */ U_OTHER_NUMBER,

116 /* Zs */ U_SPACE_SEPARATOR,

117 /* Zl */ U_LINE_SEPARATOR,

118 /* Zp */ U_PARAGRAPH_SEPARATOR,

119 /* Cc */ U_CONTROL_CHAR,

120 /* Cf */ U_FORMAT_CHAR,

121 /* Cs */ U_SURROGATE,

122 /* Co */ U_PRIVATE_USE_CHAR,

123 /* Cn */ U_UNASSIGNED,

124 /* Lu */ U_UPPERCASE_LETTER,

125 /* Ll */ U_LOWERCASE_LETTER,

126 /* Lt */ U_TITLECASE_LETTER,

127 /* Lm */ U_MODIFIER_LETTER,

128 /* Lo */ U_OTHER_LETTER,

129 /* Pc */ U_CONNECTOR_PUNCTUATION,

130 /* Pd */ U_DASH_PUNCTUATION,

131 /* Ps */ U_START_PUNCTUATION,

132 /* Pe */ U_END_PUNCTUATION,

133 /* Po */ U_OTHER_PUNCTUATION,

134 /* Sm */ U_MATH_SYMBOL,

135 /* Sc */ U_CURRENCY_SYMBOL,

136 /* Sk */ U_MODIFIER_SYMBOL,

137 /* So */ U_OTHER_SYMBOL,

138 /* Pi */ U_INITIAL_PUNCTUATION,

139 /* Pf */ U_FINAL_PUNCTUATION

140 };

141

142 static const char dirStrings[][5] = {

143 "L",

144 "R",

145 "EN",

146 "ES",

147 "ET",

148 "AN",

149 "CS",

150 "B",

151 "S",

152 "WS",

153 "ON",

154 "LRE",

155 "LRO",

156 "AL",

157 "RLE",

158 "RLO",

159 "PDF",

160 "NSM",

161 "BN",

162 /* new in Unicode 6.3/ICU 52 */

163 "FSI",

164 "LRI",

165 "RLI",

166 "PDI"

167 };

168

169 void addUnicodeTest(TestNode** root);

170

171 void addUnicodeTest(TestNode** root)

172 {

173 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");

174 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");

175 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");

176 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");

177 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");

178 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalPrope rties");

179 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties" );

180 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");

181 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");

182 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");

183 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");

184 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");

185 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");

186 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");

187 addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromName Underflow");

188 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");

189 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");

190 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");

191 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensi ons");

192 addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI" );

193 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");

194 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");

195 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");

196 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");

197 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");

198 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");

199 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");

200 }

201

202 /==================================================== /

203 /* test u_toupper() and u_tolower() */

204 /==================================================== /

205 static void TestUpperLower()

206 {

207 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0 x01c9, 0x000c, 0x0000};

208 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0 x01c9, 0x000c, 0x0000};

209 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);

210 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);

211 int32_t i;

212

213 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);

214 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);

215

216 /*

217 Checks LetterLike Symbols which were previously a source of confusion

218 [Bertrand A. D. 02/04/98]

219 */

220 for (i=0x2100;i<0x2138;i++)

221 {

222 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */

223 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)

224 {

225 if (i != (int)u_tolower(i)) /* itself */

226 log_err("Failed case conversion with itself: U+%04x\n", i);

227 if (i != (int)u_toupper(i))

228 log_err("Failed case conversion with itself: U+%04x\n", i);

229 }

230 }

231

232 for(i=0; i < u_strlen(upper); i++){

233 if(u_tolower(upper[i]) != lower[i]){

234 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i ], lower[i], u_tolower(upper[i]));

235 }

236 }

237

238 log_verbose("testing upper lower\n");

239 for (i = 0; i < 21; i++) {

240

241 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))

242 {

243 log_err("Failed isLowerCase test at %c\n", upperTest[i]);

244 }

245 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))

246 {

247 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);

248 }

249 else if (upperTest[i] != u_tolower(lowerTest[i]))

250 {

251 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], u pperTest[i]);

252 }

253 else if (lowerTest[i] != u_toupper(upperTest[i]))

254 {

255 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerT est[i]);

256 }

257 else if (upperTest[i] != u_tolower(upperTest[i]))

258 {

259 log_err("Failed case conversion with itself: %c\n", upperTest[i]);

260 }

261 else if (lowerTest[i] != u_toupper(lowerTest[i]))

262 {

263 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);

264 }

265 }

266 log_verbose("done testing upper lower\n");

267

268 log_verbose("testing u_istitle\n");

269 {

270 static const UChar expected[] = {

271 0x1F88,

272 0x1F89,

273 0x1F8A,

274 0x1F8B,

275 0x1F8C,

276 0x1F8D,

277 0x1F8E,

278 0x1F8F,

279 0x1F88,

280 0x1F89,

281 0x1F8A,

282 0x1F8B,

283 0x1F8C,

284 0x1F8D,

285 0x1F8E,

286 0x1F8F,

287 0x1F98,

288 0x1F99,

289 0x1F9A,

290 0x1F9B,

291 0x1F9C,

292 0x1F9D,

293 0x1F9E,

294 0x1F9F,

295 0x1F98,

296 0x1F99,

297 0x1F9A,

298 0x1F9B,

299 0x1F9C,

300 0x1F9D,

301 0x1F9E,

302 0x1F9F,

303 0x1FA8,

304 0x1FA9,

305 0x1FAA,

306 0x1FAB,

307 0x1FAC,

308 0x1FAD,

309 0x1FAE,

310 0x1FAF,

311 0x1FA8,

312 0x1FA9,

313 0x1FAA,

314 0x1FAB,

315 0x1FAC,

316 0x1FAD,

317 0x1FAE,

318 0x1FAF,

319 0x1FBC,

320 0x1FBC,

321 0x1FCC,

322 0x1FCC,

323 0x1FFC,

324 0x1FFC,

325 };

326 int32_t num = sizeof(expected)/sizeof(expected[0]);

327 for(i=0; i<num; i++){

328 if(!u_istitle(expected[i])){

329 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n" ,expected[i]);

330 }

331 }

332

333 }

334 }

335

336 /* compare two sets and verify that their difference or intersection is empty */

337 static UBool

338 showADiffB(const USet a, const USet b,

339 const char a_name, const char b_name,

340 UBool expect, UBool diffIsError) {

341 USet *aa;

342 int32_t i, start, end, length;

343 UErrorCode errorCode;

344

345 /*

346 * expect:

347 * TRUE -> a-b should be empty, that is, b should contain all of a

348 * FALSE -> a&b should be empty, that is, a should contain none of b (and vi ce versa)

349 */

350 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {

351 return TRUE;

352 }

353

354 /* clone a to aa because a is const */

355 aa=uset_open(1, 0);

356 if(aa==NULL) {

357 /* unusual problem - out of memory? */

358 return FALSE;

359 }

360 uset_addAll(aa, a);

361

362 /* compute the set in question */

363 if(expect) {

364 /* a-b */

365 uset_removeAll(aa, b);

366 } else {

367 /* a&b */

368 uset_retainAll(aa, b);

369 }

370

371 /* aa is not empty because of the initial tests above; show its contents */

372 errorCode=U_ZERO_ERROR;

373 i=0;

374 for(;;) {

375 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);

376 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

377 break; /* done */

378 }

379 if(U_FAILURE(errorCode)) {

380 log_err("error comparing %s with %s at difference item %d: %s\n",

381 a_name, b_name, i, u_errorName(errorCode));

382 break;

383 }

384 if(length!=0) {

385 break; /* done with code points, got a string or -1 */

386 }

387

388 if(diffIsError) {

389 if(expect) {

390 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a _name, start, end, b_name);

391 } else {

392 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);

393 }

394 } else {

395 if(expect) {

396 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n" , a_name, start, end, b_name);

397 } else {

398 log_verbose("info: %s and %s both contain U+%04x..U+%04x but sho uld not intersect\n", a_name, b_name, start, end);

399 }

400 }

401

402 ++i;

403 }

404

405 uset_close(aa);

406 return FALSE;

407 }

408

409 static UBool

410 showAMinusB(const USet a, const USet b,

411 const char a_name, const char b_name,

412 UBool diffIsError) {

413 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);

414 }

415

416 static UBool

417 showAIntersectB(const USet a, const USet b,

418 const char a_name, const char b_name,

419 UBool diffIsError) {

420 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);

421 }

422

423 static UBool

424 compareUSets(const USet a, const USet b,

425 const char a_name, const char b_name,

426 UBool diffIsError) {

427 /*

428 * Use an arithmetic & not a logical && so that both branches

429 * are always taken and all differences are shown.

430 */

431 return

432 showAMinusB(a, b, a_name, b_name, diffIsError) &

433 showAMinusB(b, a, b_name, a_name, diffIsError);

434 }

435

436 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */

437 static void TestLetterNumber()

438 {

439 UChar i = 0x0000;

440

441 log_verbose("Testing for isalpha\n");

442 for (i = 0x0041; i < 0x005B; i++) {

443 if (!u_isalpha(i))

444 {

445 log_err("Failed isLetter test at %.4X\n", i);

446 }

447 }

448 for (i = 0x0660; i < 0x066A; i++) {

449 if (u_isalpha(i))

450 {

451 log_err("Failed isLetter test with numbers at %.4X\n", i);

452 }

453 }

454

455 log_verbose("Testing for isdigit\n");

456 for (i = 0x0660; i < 0x066A; i++) {

457 if (!u_isdigit(i))

458 {

459 log_verbose("Failed isNumber test at %.4X\n", i);

460 }

461 }

462

463 log_verbose("Testing for isalnum\n");

464 for (i = 0x0041; i < 0x005B; i++) {

465 if (!u_isalnum(i))

466 {

467 log_err("Failed isAlNum test at %.4X\n", i);

468 }

469 }

470 for (i = 0x0660; i < 0x066A; i++) {

471 if (!u_isalnum(i))

472 {

473 log_err("Failed isAlNum test at %.4X\n", i);

474 }

475 }

476

477 {

478 /*

479 * The following checks work only starting from Unicode 4.0.

480 * Check the version number here.

481 */

482 static UVersionInfo u401={ 4, 0, 1, 0 };

483 UVersionInfo version;

484 u_getUnicodeVersion(version);

485 if(version[0]<4 \|\| 0==memcmp(version, u401, 4)) {

486 return;

487 }

488 }

489

490 {

491 /*

492 * Sanity check:

493 * Verify that exactly the digit characters have decimal digit values.

494 * This assumption is used in the implementation of u_digit()

495 * (which checks nt=de)

496 * compared with the parallel java.lang.Character.digit()

497 * (which checks Nd).

498 *

499 * This was not true in Unicode 3.2 and earlier.

500 * Unicode 4.0 fixed discrepancies.

501 * Unicode 4.0.1 re-introduced problems in this area due to an

502 * unintentionally incomplete last-minute change.

503 */

504 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);

505 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);

506

507 USet digits, decimalValues;

508 UErrorCode errorCode;

509

510 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);

511 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);

512 errorCode=U_ZERO_ERROR;

513 digits=uset_openPattern(digitsPattern, 6, &errorCode);

514 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);

515

516 if(U_SUCCESS(errorCode)) {

517 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decima l:]", TRUE);

518 }

519

520 uset_close(digits);

521 uset_close(decimalValues);

522 }

523 }

524

525 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,

526 const UChar32 *sampleChars, int32_t sampleCharsL ength,

527 UBool expected) {

528 int32_t i;

529 for (i = 0; i < sampleCharsLength; ++i) {

530 UBool result = propFn(sampleChars[i]);

531 if (result != expected) {

532 log_err("error: character property function %s(U+%04x)=%d is wrong\n ",

533 propName, sampleChars[i], result);

534 }

535 }

536 }

537

538 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_issp ace()), isWhiteSpace(), u_CharDigitValue() */

539 static void TestMisc()

540 {

541 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x200 5};

542 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};

543 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};

544 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};

545 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};

546 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};

547 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8 , 0xFFF0};*/

548 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};

549 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};

550 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};

551 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};

552

553 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};

554

555 uint32_t mask;

556

557 int32_t i;

558 char icuVersion[U_MAX_VERSION_STRING_LENGTH];

559 UVersionInfo realVersion;

560

561 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);

562

563 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(samp leSpaces), TRUE);

564 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(s ampleNonSpaces), FALSE);

565

566 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",

567 sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);

568 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",

569 sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);

570

571 testSampleCharProps(u_isWhitespace, "u_isWhitespace",

572 sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRU E);

573 testSampleCharProps(u_isWhitespace, "u_isWhitespace",

574 sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces ), FALSE);

575

576 testSampleCharProps(u_isdefined, "u_isdefined",

577 sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);

578 testSampleCharProps(u_isdefined, "u_isdefined",

579 sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);

580

581 testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBa se), TRUE);

582 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampl eNonBase), FALSE);

583

584 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(samp leDigits), TRUE);

585 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(s ampleNonDigits), FALSE);

586

587 for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {

588 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {

589 log_err("error: u_charDigitValue(U+04x)=%d != %d\n",

590 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDi gitValues[i]);

591 }

592 }

593

594 /* Tests the ICU version #*/

595 u_getVersion(realVersion);

596 u_versionToString(realVersion, icuVersion);

597 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)

598 {

599 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERS ION, icuVersion);

600 }

601 #if defined(ICU_VERSION)

602 /* test only happens where we have configure.in with VERSION - sanity check. */

603 if(strcmp(U_ICU_VERSION, ICU_VERSION))

604 {

605 log_err("ICU version mismatch: Header says %s, build environment says %s .\n", U_ICU_VERSION, ICU_VERSION);

606 }

607 #endif

608

609 /* test U_GC_... */

610 if(

611 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK \|\|

612 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK \|\|

613 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK \|\|

614 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK \|\|

615 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK \|\|

616 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK

617 ) {

618 log_err("error: U_GET_GC_MASK does not work properly\n");

619 }

620

621 mask=0;

622 mask=(mask&~U_GC_CN_MASK)\|U_GC_CN_MASK;

623

624 mask=(mask&~U_GC_LU_MASK)\|U_GC_LU_MASK;

625 mask=(mask&~U_GC_LL_MASK)\|U_GC_LL_MASK;

626 mask=(mask&~U_GC_LT_MASK)\|U_GC_LT_MASK;

627 mask=(mask&~U_GC_LM_MASK)\|U_GC_LM_MASK;

628 mask=(mask&~U_GC_LO_MASK)\|U_GC_LO_MASK;

629

630 mask=(mask&~U_GC_MN_MASK)\|U_GC_MN_MASK;

631 mask=(mask&~U_GC_ME_MASK)\|U_GC_ME_MASK;

632 mask=(mask&~U_GC_MC_MASK)\|U_GC_MC_MASK;

633

634 mask=(mask&~U_GC_ND_MASK)\|U_GC_ND_MASK;

635 mask=(mask&~U_GC_NL_MASK)\|U_GC_NL_MASK;

636 mask=(mask&~U_GC_NO_MASK)\|U_GC_NO_MASK;

637

638 mask=(mask&~U_GC_ZS_MASK)\|U_GC_ZS_MASK;

639 mask=(mask&~U_GC_ZL_MASK)\|U_GC_ZL_MASK;

640 mask=(mask&~U_GC_ZP_MASK)\|U_GC_ZP_MASK;

641

642 mask=(mask&~U_GC_CC_MASK)\|U_GC_CC_MASK;

643 mask=(mask&~U_GC_CF_MASK)\|U_GC_CF_MASK;

644 mask=(mask&~U_GC_CO_MASK)\|U_GC_CO_MASK;

645 mask=(mask&~U_GC_CS_MASK)\|U_GC_CS_MASK;

646

647 mask=(mask&~U_GC_PD_MASK)\|U_GC_PD_MASK;

648 mask=(mask&~U_GC_PS_MASK)\|U_GC_PS_MASK;

649 mask=(mask&~U_GC_PE_MASK)\|U_GC_PE_MASK;

650 mask=(mask&~U_GC_PC_MASK)\|U_GC_PC_MASK;

651 mask=(mask&~U_GC_PO_MASK)\|U_GC_PO_MASK;

652

653 mask=(mask&~U_GC_SM_MASK)\|U_GC_SM_MASK;

654 mask=(mask&~U_GC_SC_MASK)\|U_GC_SC_MASK;

655 mask=(mask&~U_GC_SK_MASK)\|U_GC_SK_MASK;

656 mask=(mask&~U_GC_SO_MASK)\|U_GC_SO_MASK;

657

658 mask=(mask&~U_GC_PI_MASK)\|U_GC_PI_MASK;

659 mask=(mask&~U_GC_PF_MASK)\|U_GC_PF_MASK;

660

661 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff ffff)) {

662 log_err("error: problems with U_GC_XX_MASK constants\n");

663 }

664

665 mask=0;

666 mask=(mask&~U_GC_C_MASK)\|U_GC_C_MASK;

667 mask=(mask&~U_GC_L_MASK)\|U_GC_L_MASK;

668 mask=(mask&~U_GC_M_MASK)\|U_GC_M_MASK;

669 mask=(mask&~U_GC_N_MASK)\|U_GC_N_MASK;

670 mask=(mask&~U_GC_Z_MASK)\|U_GC_Z_MASK;

671 mask=(mask&~U_GC_P_MASK)\|U_GC_P_MASK;

672 mask=(mask&~U_GC_S_MASK)\|U_GC_S_MASK;

673

674 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffff ffff)) {

675 log_err("error: problems with U_GC_Y_MASK constants\n");

676 }

677 {

678 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x00 35,0x0036,0x0037,0x0038,0x0039 };

679 for(i=0; i<10; i++){

680 if(digit[i]!=u_forDigit(i,10)){

681 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n" ,i,digit[i],u_forDigit(i,10));

682 }

683 }

684 }

685

686 /* test u_digit() */

687 {

688 static const struct {

689 UChar32 c;

690 int8_t radix, value;

691 } data[]={

692 /* base 16 */

693 { 0x0031, 16, 1 },

694 { 0x0038, 16, 8 },

695 { 0x0043, 16, 12 },

696 { 0x0066, 16, 15 },

697 { 0x00e4, 16, -1 },

698 { 0x0662, 16, 2 },

699 { 0x06f5, 16, 5 },

700 { 0xff13, 16, 3 },

701 { 0xff41, 16, 10 },

702

703 /* base 8 */

704 { 0x0031, 8, 1 },

705 { 0x0038, 8, -1 },

706 { 0x0043, 8, -1 },

707 { 0x0066, 8, -1 },

708 { 0x00e4, 8, -1 },

709 { 0x0662, 8, 2 },

710 { 0x06f5, 8, 5 },

711 { 0xff13, 8, 3 },

712 { 0xff41, 8, -1 },

713

714 /* base 36 */

715 { 0x5a, 36, 35 },

716 { 0x7a, 36, 35 },

717 { 0xff3a, 36, 35 },

718 { 0xff5a, 36, 35 },

719

720 /* wrong radix values */

721 { 0x0031, 1, -1 },

722 { 0xff3a, 37, -1 }

723 };

724

725 for(i=0; i<UPRV_LENGTHOF(data); ++i) {

726 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {

727 log_err("u_digit(U+%04x, %d)=%d expected %d\n",

728 data[i].c,

729 data[i].radix,

730 u_digit(data[i].c, data[i].radix),

731 data[i].value);

732 }

733 }

734 }

735 }

736

737 /* test C/POSIX-style functions --------------------------------------------- */

738

739 /* bit flags */

740 #define ISAL 1

741 #define ISLO 2

742 #define ISUP 4

743

744 #define ISDI 8

745 #define ISXD 0x10

746

747 #define ISAN 0x20

748

749 #define ISPU 0x40

750 #define ISGR 0x80

751 #define ISPR 0x100

752

753 #define ISSP 0x200

754 #define ISBL 0x400

755 #define ISCN 0x800

756

757 /* C/POSIX-style functions, in the same order as the bit flags */

758 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);

759

760 static const struct {

761 IsPOSIXClass *fn;

762 const char *name;

763 } posixClasses[]={

764 { u_isalpha, "isalpha" },

765 { u_islower, "islower" },

766 { u_isupper, "isupper" },

767 { u_isdigit, "isdigit" },

768 { u_isxdigit, "isxdigit" },

769 { u_isalnum, "isalnum" },

770 { u_ispunct, "ispunct" },

771 { u_isgraph, "isgraph" },

772 { u_isprint, "isprint" },

773 { u_isspace, "isspace" },

774 { u_isblank, "isblank" },

775 { u_iscntrl, "iscntrl" }

776 };

777

778 static const struct {

779 UChar32 c;

780 uint32_t posixResults;

781 } posixData[]={

782 { 0x0008, ISCN }, /* backspace */

783 { 0x0009, ISSP\|ISBL\|ISCN }, /* TAB */

784 { 0x000a, ISSP\| ISCN }, /* LF */

785 { 0x000c, ISSP\| ISCN }, /* FF */

786 { 0x000d, ISSP\| ISCN }, /* CR */

787 { 0x0020, ISPR\|ISSP\|ISBL }, /* space */

788 { 0x0021, ISPU\|ISGR\|ISPR }, /* ! */

789 { 0x0033, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* 3 */

790 { 0x0040, ISPU\|ISGR\|ISPR }, /* @ */

791 { 0x0041, ISAL\| ISUP\| ISXD\|ISAN\| ISGR\|ISPR }, /* A */

792 { 0x007a, ISAL\|ISLO\| ISAN\| ISGR\|ISPR }, /* z */

793 { 0x007b, ISPU\|ISGR\|ISPR }, /* { */

794 { 0x0085, ISSP\| ISCN }, /* NEL */

795 { 0x00a0, ISPR\|ISSP\|ISBL }, /* NBSP */

796 { 0x00a4, ISGR\|ISPR }, /* currency sign */

797 { 0x00e4, ISAL\|ISLO\| ISAN\| ISGR\|ISPR }, /* a-umlaut */

798 { 0x0300, ISGR\|ISPR }, /* combining grave */

799 { 0x0600, ISCN }, /* arabic number sign */

800 { 0x0627, ISAL\| ISAN\| ISGR\|ISPR }, /* alef */

801 { 0x0663, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* arabic 3 */

802 { 0x2002, ISPR\|ISSP\|ISBL }, /* en space */

803 { 0x2007, ISPR\|ISSP\|ISBL }, /* figure space */

804 { 0x2009, ISPR\|ISSP\|ISBL }, /* thin space */

805 { 0x200b, ISCN }, /* ZWSP */

806 /{ 0x200b, ISPR\|ISSP },/ /* ZWSP / / ZWSP became a control char in 4.0.1*/

807 { 0x200e, ISCN }, /* LRM */

808 { 0x2028, ISPR\|ISSP\| ISCN }, /* LS */

809 { 0x2029, ISPR\|ISSP\| ISCN }, /* PS */

810 { 0x20ac, ISGR\|ISPR }, /* Euro */

811 { 0xff15, ISDI\|ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth 5 */

812 { 0xff25, ISAL\| ISUP\| ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth E */

813 { 0xff35, ISAL\| ISUP\| ISAN\| ISGR\|ISPR }, /* fullwidth U */

814 { 0xff45, ISAL\|ISLO\| ISXD\|ISAN\| ISGR\|ISPR }, /* fullwidth e */

815 { 0xff55, ISAL\|ISLO\| ISAN\| ISGR\|ISPR } /* fullwidth u */

816 };

817

818 static void

819 TestPOSIX() {

820 uint32_t mask;

821 int32_t cl, i;

822 UBool expect;

823

824 mask=1;

825 for(cl=0; cl<12; ++cl) {

826 for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {

827 expect=(UBool)((posixData[i].posixResults&mask)!=0);

828 if(posixClasses[cl].fn(posixData[i].c)!=expect) {

829 log_err("u_%s(U+%04x)=%s is wrong\n",

830 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "T RUE");

831 }

832 }

833 mask<<=1;

834 }

835 }

836

837 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */

838 static void TestControlPrint()

839 {

840 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0 x202b};

841 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};

842 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};

843 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};

844 UChar32 c;

845

846 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sam pleControl), TRUE);

847 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF( sampleNonControl), FALSE);

848

849 testSampleCharProps(u_isprint, "u_isprint",

850 samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);

851 testSampleCharProps(u_isprint, "u_isprint",

852 sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), F ALSE);

853

854 /* test all ISO 8 controls */

855 for(c=0; c<=0x9f; ++c) {

856 if(c==0x20) {

857 /* skip ASCII graphic characters and continue with DEL */

858 c=0x7f;

859 }

860 if(!u_iscntrl(c)) {

861 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);

862 }

863 if(!u_isISOControl(c)) {

864 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);

865 }

866 if(u_isprint(c)) {

867 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);

868 }

869 }

870

871 /* test all Latin-1 graphic characters */

872 for(c=0x20; c<=0xff; ++c) {

873 if(c==0x7f) {

874 c=0xa0;

875 } else if(c==0xad) {

876 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not p rintable) */

877 ++c;

878 }

879 if(!u_isprint(c)) {

880 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n" , c);

881 }

882 }

883 }

884

885 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable ()*/

886 static void TestIdentifier()

887 {

888 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};

889 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};

890 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};

891 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};

892 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};

893 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};

894 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};

895 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};

896 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};

897 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};

898

899 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",

900 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRU E);

901 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",

902 sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart ), FALSE);

903

904 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

905 sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE) ;

906 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

907 sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);

908

909 /* IDPart should imply IDStart */

910 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",

911 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRU E);

912

913 testSampleCharProps(u_isIDStart, "u_isIDStart",

914 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart ), TRUE);

915 testSampleCharProps(u_isIDStart, "u_isIDStart",

916 sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeI DStart), FALSE);

917

918 testSampleCharProps(u_isIDPart, "u_isIDPart",

919 sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);

920 testSampleCharProps(u_isIDPart, "u_isIDPart",

921 sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeID Part), FALSE);

922

923 /* IDPart should imply IDStart */

924 testSampleCharProps(u_isIDPart, "u_isIDPart",

925 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart ), TRUE);

926

927 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",

928 sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);

929 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",

930 sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FAL SE);

931 }

932

933 /* for each line of UnicodeData.txt, check some of the properties */

934 typedef struct UnicodeDataContext {

935 #if UCONFIG_NO_NORMALIZATION

936 const void *dummy;

937 #else

938 const UNormalizer2 *nfc;

939 const UNormalizer2 *nfkc;

940 #endif

941 } UnicodeDataContext;

942

943 /*

944 * ### TODO

945 * This test fails incorrectly if the First or Last code point of a repetitive a rea

946 * is overridden, which is allowed and is encouraged for the PUAs.

947 * Currently, this means that both area First/Last and override lines are

948 * tested against the properties from the API,

949 * and the area boundary will not match and cause an error.

950 *

951 * This function should detect area boundaries and skip them for the test of ind ividual

952 * code points' properties.

953 * Then it should check that the areas contain all the same properties except wh ere overridden.

954 * For this, it would have had to set a flag for which code points were listed e xplicitly.

955 */

956 static void U_CALLCONV

957 unicodeDataLineFn(void *context,

958 char *fields[][2], int32_t fieldCount,

959 UErrorCode *pErrorCode)

960 {

961 char buffer[100];

962 const char *d;

963 char *end;

964 uint32_t value;

965 UChar32 c;

966 int32_t i;

967 int8_t type;

968 int32_t dt;

969 UChar dm[32], s[32];

970 int32_t dmLength, length;

971

972 #if !UCONFIG_NO_NORMALIZATION

973 const UNormalizer2 nfc, nfkc;

974 #endif

975

976 /* get the character code, field 0 */

977 c=strtoul(fields[0][0], &end, 16);

978 if(end<=fields[0][0] \|\| end!=fields[0][1]) {

979 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);

980 return;

981 }

982 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {

983 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);

984 return;

985 }

986

987 /* get general category, field 2 */

988 *fields[2][1]=0;

989 type = (int8_t)tagValues[MakeProp(fields[2][0])];

990 if(u_charType(c)!=type) {

991 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType( c), type);

992 }

993 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(t ype)) {

994 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_C ATEGORY_MASK)!=U_MASK(u_charType())\n", c);

995 }

996

997 /* get canonical combining class, field 3 */

998 value=strtoul(fields[3][0], &end, 10);

999 if(end<=fields[3][0] \|\| end!=fields[3][1]) {

1000 log_err("error: syntax error in field 3 at code 0x%lx\n", c);

1001 return;

1002 }

1003 if(value>255) {

1004 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);

1005 return;

1006 }

1007 #if !UCONFIG_NO_NORMALIZATION

1008 if(value!=u_getCombiningClass(c) \|\| value!=(uint32_t)u_getIntPropertyValue(c , UCHAR_CANONICAL_COMBINING_CLASS)) {

1009 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);

1010 }

1011 nfkc=((UnicodeDataContext *)context)->nfkc;

1012 if(value!=unorm2_getCombiningClass(nfkc, c)) {

1013 log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);

1014 }

1015 #endif

1016

1017 /* get BiDi category, field 4 */

1018 *fields[4][1]=0;

1019 i=MakeDir(fields[4][0]);

1020 if(i!=u_charDirection(c) \|\| i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {

1021 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u _charDirection(c), MakeDir(fields[4][0]), fields[4][0]);

1022 }

1023

1024 /* get Decomposition_Type & Decomposition_Mapping, field 5 */

1025 d=NULL;

1026 if(fields[5][0]==fields[5][1]) {

1027 /* no decomposition, except UnicodeData.txt omits Hangul syllable decomp ositions */

1028 if(c==0xac00 \|\| c==0xd7a3) {

1029 dt=U_DT_CANONICAL;

1030 } else {

1031 dt=U_DT_NONE;

1032 }

1033 } else {

1034 d=fields[5][0];

1035 *fields[5][1]=0;

1036 dt=UCHAR_INVALID_CODE;

1037 if(*d=='<') {

1038 end=strchr(++d, '>');

1039 if(end!=NULL) {

1040 *end=0;

1041 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);

1042 d=u_skipWhitespace(end+1);

1043 }

1044 } else {

1045 dt=U_DT_CANONICAL;

1046 }

1047 }

1048 if(dt>U_DT_NONE) {

1049 if(c==0xac00) {

1050 dm[0]=0x1100;

1051 dm[1]=0x1161;

1052 dm[2]=0;

1053 dmLength=2;

1054 } else if(c==0xd7a3) {

1055 dm[0]=0xd788;

1056 dm[1]=0x11c2;

1057 dm[2]=0;

1058 dmLength=2;

1059 } else {

1060 dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);

1061 }

1062 } else {

1063 dmLength=-1;

1064 }

1065 if(dt<0 \|\| U_FAILURE(*pErrorCode)) {

1066 log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);

1067 return;

1068 }

1069 #if !UCONFIG_NO_NORMALIZATION

1070 i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);

1071 if(i!=dt) {

1072 log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE) ==%d instead of %d\n", c, i, dt);

1073 }

1074 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */

1075 length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);

1076 if(U_FAILURE(*pErrorCode) \|\| length!=dmLength \|\| (length>0 && 0!=u_strcmp(s, dm))) {

1077 log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "

1078 "or the Decomposition_Mapping is different (%s)\n",

1079 c, length, dmLength, u_errorName(*pErrorCode));

1080 return;

1081 }

1082 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRa wDecomposition(c). */

1083 if(dt!=U_DT_CANONICAL) {

1084 dmLength=-1;

1085 }

1086 nfc=((UnicodeDataContext *)context)->nfc;

1087 length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);

1088 if(U_FAILURE(*pErrorCode) \|\| length!=dmLength \|\| (length>0 && 0!=u_strcmp(s, dm))) {

1089 log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "

1090 "or the Decomposition_Mapping is different (%s)\n",

1091 c, length, dmLength, u_errorName(*pErrorCode));

1092 return;

1093 }

1094 /* recompose */

1095 if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCL USION)) {

1096 UChar32 a, b, composite;

1097 i=0;

1098 U16_NEXT(dm, i, dmLength, a);

1099 U16_NEXT(dm, i, dmLength, b);

1100 /* i==dmLength */

1101 composite=unorm2_composePair(nfc, a, b);

1102 if(composite!=c) {

1103 log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does n ot compose back (instead U+%04lX)\n",

1104 (long)c, (long)a, (long)b, (long)composite);

1105 }

1106 /*

1107 * Note: NFKC has fewer round-trip mappings than NFC,

1108 * so we can't just test unorm2_composePair(nfkc, a, b) here without fur ther data.

1109 */

1110 }

1111 #endif

1112

1113 /* get ISO Comment, field 11 */

1114 *fields[11][1]=0;

1115 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);

1116 if(U_FAILURE(*pErrorCode) \|\| 0!=strcmp(fields[11][0], buffer)) {

1117 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",

1118 c, u_errorName(*pErrorCode),

1119 U_FAILURE(*pErrorCode) ? buffer : "[error]",

1120 fields[11][0]);

1121 }

1122

1123 /* get uppercase mapping, field 12 */

1124 if(fields[12][0]!=fields[12][1]) {

1125 value=strtoul(fields[12][0], &end, 16);

1126 if(end!=fields[12][1]) {

1127 log_err("error: syntax error in field 12 at code 0x%lx\n", c);

1128 return;

1129 }

1130 if((UChar32)value!=u_toupper(c)) {

1131 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_toupper(c), value);

1132 }

1133 } else {

1134 /* no case mapping: the API must map the code point to itself */

1135 if(c!=u_toupper(c)) {

1136 log_err("error: U+%04lx does not have an uppercase mapping but u_tou pper()==U+%04lx\n", c, u_toupper(c));

1137 }

1138 }

1139

1140 /* get lowercase mapping, field 13 */

1141 if(fields[13][0]!=fields[13][1]) {

1142 value=strtoul(fields[13][0], &end, 16);

1143 if(end!=fields[13][1]) {

1144 log_err("error: syntax error in field 13 at code 0x%lx\n", c);

1145 return;

1146 }

1147 if((UChar32)value!=u_tolower(c)) {

1148 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_tolower(c), value);

1149 }

1150 } else {

1151 /* no case mapping: the API must map the code point to itself */

1152 if(c!=u_tolower(c)) {

1153 log_err("error: U+%04lx does not have a lowercase mapping but u_tolo wer()==U+%04lx\n", c, u_tolower(c));

1154 }

1155 }

1156

1157 /* get titlecase mapping, field 14 */

1158 if(fields[14][0]!=fields[14][1]) {

1159 value=strtoul(fields[14][0], &end, 16);

1160 if(end!=fields[14][1]) {

1161 log_err("error: syntax error in field 14 at code 0x%lx\n", c);

1162 return;

1163 }

1164 if((UChar32)value!=u_totitle(c)) {

1165 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c , u_totitle(c), value);

1166 }

1167 } else {

1168 /* no case mapping: the API must map the code point to itself */

1169 if(c!=u_totitle(c)) {

1170 log_err("error: U+%04lx does not have a titlecase mapping but u_toti tle()==U+%04lx\n", c, u_totitle(c));

1171 }

1172 }

1173 }

1174

1175 static UBool U_CALLCONV

1176 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory t ype) {

1177 static const UChar32 test[][2]={

1178 {0x41, U_UPPERCASE_LETTER},

1179 {0x308, U_NON_SPACING_MARK},

1180 {0xfffe, U_GENERAL_OTHER_TYPES},

1181 {0xe0041, U_FORMAT_CHAR},

1182 {0xeffff, U_UNASSIGNED}

1183 };

1184

1185 int32_t i, count;

1186

1187 if(0!=strcmp((const char *)context, "a1")) {

1188 log_err("error: u_enumCharTypes() passes on an incorrect context pointer \n");

1189 return FALSE;

1190 }

1191

1192 count=UPRV_LENGTHOF(test);

1193 for(i=0; i<count; ++i) {

1194 if(start<=test[i][0] && test[i][0]<limit) {

1195 if(type!=(UCharCategory)test[i][1]) {

1196 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ w ith %ld instead of U+%04lx with %ld\n",

1197 start, limit, (long)type, test[i][0], test[i][1]);

1198 }

1199 /* stop at the range that includes the last test code point (increas es code coverage for enumeration) */

1200 return i==(count-1) ? FALSE : TRUE;

1201 }

1202 }

1203

1204 if(start>test[count-1][0]) {

1205 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",

1206 start, limit, (long)type);

1207 return FALSE;

1208 }

1209

1210 return TRUE;

1211 }

1212

1213 static UBool U_CALLCONV

1214 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCatego ry type) {

1215 /* default Bidi classes for unassigned code points, from the DerivedBidiClas s.txt header */

1216 static const int32_t defaultBidi[][2]={ /* { limit, class } */

1217 { 0x0590, U_LEFT_TO_RIGHT },

1218 { 0x0600, U_RIGHT_TO_LEFT },

1219 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },

1220 { 0x08A0, U_RIGHT_TO_LEFT },

1221 { 0x0900, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08 FF from R to AL */

1222 { 0x20A0, U_LEFT_TO_RIGHT },

1223 { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the cu rrency symbols block U+20A0..U+20CF to default to ET not L */

1224 { 0xFB1D, U_LEFT_TO_RIGHT },

1225 { 0xFB50, U_RIGHT_TO_LEFT },

1226 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },

1227 { 0xFE70, U_LEFT_TO_RIGHT },

1228 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },

1229 { 0x10800, U_LEFT_TO_RIGHT },

1230 { 0x11000, U_RIGHT_TO_LEFT },

1231 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+ 1E800 - U+1EFFF */

1232 { 0x1EE00, U_RIGHT_TO_LEFT },

1233 { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+ 1EEFF from R to AL */

1234 { 0x1F000, U_RIGHT_TO_LEFT },

1235 { 0x110000, U_LEFT_TO_RIGHT }

1236 };

1237

1238 UChar32 c;

1239 int32_t i;

1240 UCharDirection shouldBeDir;

1241

1242 /*

1243 * LineBreak.txt specifies:

1244 * # - Assigned characters that are not listed explicitly are given the v alue

1245 * # "AL".

1246 * # - Unassigned characters are given the value "XX".

1247 *

1248 * PUA characters are listed explicitly with "XX".

1249 * Verify that no assigned character has "XX".

1250 */

1251 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {

1252 c=start;

1253 while(c<limit) {

1254 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {

1255 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);

1256 }

1257 ++c;

1258 }

1259 }

1260

1261 /*

1262 * Verify default Bidi classes.

1263 * For recent Unicode versions, see UCD.html.

1264 *

1265 * For older Unicode versions:

1266 * See table 3-7 "Bidirectional Character Types" in UAX #9.

1267 * http://www.unicode.org/reports/tr9/

1268 *

1269 * See also DerivedBidiClass.txt for Cn code points!

1270 *

1271 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/reso lved-pri.html)

1272 * changed some default values.

1273 * In particular, non-characters and unassigned Default Ignorable Code Point s

1274 * change from L to BN.

1275 *

1276 * UCD.html version 4.0.1 does not yet reflect these changes.

1277 */

1278 if(type==U_UNASSIGNED \|\| type==U_PRIVATE_USE_CHAR) {

1279 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */

1280 c=start;

1281 for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {

1282 if((int32_t)c<defaultBidi[i][0]) {

1283 while(c<limit && (int32_t)c<defaultBidi[i][0]) {

1284 if(U_IS_UNICODE_NONCHAR(c) \|\| u_hasBinaryProperty(c, UCHAR_D EFAULT_IGNORABLE_CODE_POINT)) {

1285 shouldBeDir=U_BOUNDARY_NEUTRAL;

1286 } else {

1287 shouldBeDir=(UCharDirection)defaultBidi[i][1];

1288 }

1289

1290 if( u_charDirection(c)!=shouldBeDir \|\|

1291 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir

1292 ) {

1293 log_err("error: u_charDirection(unassigned/PUA U+%04lx)= %s should be %s\n",

1294 c, dirStrings[u_charDirection(c)], dirStrings[should BeDir]);

1295 }

1296 ++c;

1297 }

1298 }

1299 }

1300 }

1301

1302 return TRUE;

1303 }

1304

1305 /* tests for several properties */

1306 static void TestUnicodeData()

1307 {

1308 UVersionInfo expectVersionArray;

1309 UVersionInfo versionArray;

1310 char *fields[15][2];

1311 UErrorCode errorCode;

1312 UChar32 c;

1313 int8_t type;

1314

1315 UnicodeDataContext context;

1316

1317 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);

1318 u_getUnicodeVersion(versionArray);

1319 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)

1320 {

1321 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",

1322 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);

1323 }

1324

1325 #if defined(ICU_UNICODE_VERSION)

1326 /* test only happens where we have configure.in with UNICODE_VERSION - sanit y check. */

1327 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))

1328 {

1329 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNIC ODE_VERSION " got " ICU_UNICODE_VERSION "\n");

1330 }

1331 #endif

1332

1333 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN \|\| u_getIntPropertyV alue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {

1334 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \ n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));

1335 }

1336

1337 errorCode=U_ZERO_ERROR;

1338 #if !UCONFIG_NO_NORMALIZATION

1339 context.nfc=unorm2_getNFCInstance(&errorCode);

1340 context.nfkc=unorm2_getNFKCInstance(&errorCode);

1341 if(U_FAILURE(errorCode)) {

1342 log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));

1343 return;

1344 }

1345 #endif

1346 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &er rorCode);

1347 if(U_FAILURE(errorCode)) {

1348 return; /* if we couldn't parse UnicodeData.txt, we should return */

1349 }

1350

1351 /* sanity check on repeated properties */

1352 for(c=0xfffe; c<=0x10ffff;) {

1353 type=u_charType(c);

1354 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA SK(type)) {

1355 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);

1356 }

1357 if(type!=U_UNASSIGNED) {

1358 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c , u_charType(c));

1359 }

1360 if((c&0xffff)==0xfffe) {

1361 ++c;

1362 } else {

1363 c+=0xffff;

1364 }

1365 }

1366

1367 /* test that PUA is not "unassigned" */

1368 for(c=0xe000; c<=0x10fffd;) {

1369 type=u_charType(c);

1370 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MA SK(type)) {

1371 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENER AL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);

1372 }

1373 if(type==U_UNASSIGNED) {

1374 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);

1375 } else if(type!=U_PRIVATE_USE_CHAR) {

1376 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);

1377 }

1378 if(c==0xf8ff) {

1379 c=0xf0000;

1380 } else if(c==0xffffd) {

1381 c=0x100000;

1382 } else {

1383 ++c;

1384 }

1385 }

1386

1387 /* test u_enumCharTypes() */

1388 u_enumCharTypes(enumTypeRange, "a1");

1389

1390 /* check default properties */

1391 u_enumCharTypes(enumDefaultsRange, NULL);

1392 }

1393

1394 static void TestCodeUnit(){

1395 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xd bff,0xdc00,0xdc02,0xddee,0xdfff,0};

1396

1397 int32_t i;

1398

1399 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){

1400 UChar c=codeunit[i];

1401 if(i<4){

1402 if(!(UTF_IS_SINGLE(c)) \|\| (UTF_IS_LEAD(c)) \|\| (UTF_IS_TRAIL(c)) \|\|(U TF_IS_SURROGATE(c))){

1403 log_err("ERROR: U+%04x is a single", c);

1404 }

1405

1406 }

1407 if(i >= 4 && i< 8){

1408 if(!(UTF_IS_LEAD(c)) \|\| UTF_IS_SINGLE(c) \|\| UTF_IS_TRAIL(c) \|\| !(UTF _IS_SURROGATE(c))){

1409 log_err("ERROR: U+%04x is a first surrogate", c);

1410 }

1411 }

1412 if(i >= 8 && i< 12){

1413 if(!(UTF_IS_TRAIL(c)) \|\| UTF_IS_SINGLE(c) \|\| UTF_IS_LEAD(c) \|\| !(UTF _IS_SURROGATE(c))){

1414 log_err("ERROR: U+%04x is a second surrogate", c);

1415 }

1416 }

1417 }

1418

1419 }

1420

1421 static void TestCodePoint(){

1422 const UChar32 codePoint[]={

1423 /surrogate, notvalid(codepoint), not a UnicodeChar, not Error /

1424 0xd800,

1425 0xdbff,

1426 0xdc00,

1427 0xdfff,

1428 0xdc04,

1429 0xd821,

1430 /not a surrogate, valid, isUnicodeChar , not Error/

1431 0x20ac,

1432 0xd7ff,

1433 0xe000,

1434 0xe123,

1435 0x0061,

1436 0xe065,

1437 0x20402,

1438 0x24506,

1439 0x23456,

1440 0x20402,

1441 0x10402,

1442 0x23456,

1443 /not a surrogate, not valid, isUnicodeChar, isError /

1444 0x0015,

1445 0x009f,

1446 /not a surrogate, not valid, not isUnicodeChar, isError /

1447 0xffff,

1448 0xfffe,

1449 };

1450 int32_t i;

1451 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){

1452 UChar32 c=codePoint[i];

1453 if(i<6){

1454 if(!UTF_IS_SURROGATE(c) \|\| !U_IS_SURROGATE(c) \|\| !U16_IS_SURROGATE(c )){

1455 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

1456 }

1457 if(UTF_IS_VALID(c)){

1458 log_err("ERROR: isValid() failed for U+%04x\n", c);

1459 }

1460 if(UTF_IS_UNICODE_CHAR(c) \|\| U_IS_UNICODE_CHAR(c)){

1461 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

1462 }

1463 if(UTF_IS_ERROR(c)){

1464 log_err("ERROR: isError() failed for U+%04x\n", c);

1465 }

1466 }else if(i >=6 && i<18){

1467 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

1468 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

1469 }

1470 if(!UTF_IS_VALID(c)){

1471 log_err("ERROR: isValid() failed for U+%04x\n", c);

1472 }

1473 if(!UTF_IS_UNICODE_CHAR(c) \|\| !U_IS_UNICODE_CHAR(c)){

1474 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

1475 }

1476 if(UTF_IS_ERROR(c)){

1477 log_err("ERROR: isError() failed for U+%04x\n", c);

1478 }

1479 }else if(i >=18 && i<20){

1480 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

1481 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

1482 }

1483 if(UTF_IS_VALID(c)){

1484 log_err("ERROR: isValid() failed for U+%04x\n", c);

1485 }

1486 if(!UTF_IS_UNICODE_CHAR(c) \|\| !U_IS_UNICODE_CHAR(c)){

1487 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

1488 }

1489 if(!UTF_IS_ERROR(c)){

1490 log_err("ERROR: isError() failed for U+%04x\n", c);

1491 }

1492 }

1493 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){

1494 if(UTF_IS_SURROGATE(c) \|\| U_IS_SURROGATE(c) \|\| U16_IS_SURROGATE(c)){

1495 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);

1496 }

1497 if(UTF_IS_VALID(c)){

1498 log_err("ERROR: isValid() failed for U+%04x\n", c);

1499 }

1500 if(UTF_IS_UNICODE_CHAR(c) \|\| U_IS_UNICODE_CHAR(c)){

1501 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);

1502 }

1503 if(!UTF_IS_ERROR(c)){

1504 log_err("ERROR: isError() failed for U+%04x\n", c);

1505 }

1506 }

1507 }

1508

1509 if(

1510 !U_IS_BMP(0) \|\| !U_IS_BMP(0x61) \|\| !U_IS_BMP(0x20ac) \|\|

1511 !U_IS_BMP(0xd9da) \|\| !U_IS_BMP(0xdfed) \|\| !U_IS_BMP(0xffff) \|\|

1512 U_IS_BMP(U_SENTINEL) \|\| U_IS_BMP(0x10000) \|\| U_IS_BMP(0x50005) \|\|

1513 U_IS_BMP(0x10ffff) \|\| U_IS_BMP(0x110000) \|\| U_IS_BMP(0x7fffffff)

1514 ) {

1515 log_err("error with U_IS_BMP()\n");

1516 }

1517

1518 if(

1519 U_IS_SUPPLEMENTARY(0) \|\| U_IS_SUPPLEMENTARY(0x61) \|\| U_IS_SUPPLEMENTARY( 0x20ac) \|\|

1520 U_IS_SUPPLEMENTARY(0xd9da) \|\| U_IS_SUPPLEMENTARY(0xdfed) \|\| U_IS_SUPPLEM ENTARY(0xffff) \|\|

1521 U_IS_SUPPLEMENTARY(U_SENTINEL) \|\| !U_IS_SUPPLEMENTARY(0x10000) \|\| !U_IS_ SUPPLEMENTARY(0x50005) \|\|

1522 !U_IS_SUPPLEMENTARY(0x10ffff) \|\| U_IS_SUPPLEMENTARY(0x110000) \|\| U_IS_SU PPLEMENTARY(0x7fffffff)

1523 ) {

1524 log_err("error with U_IS_SUPPLEMENTARY()\n");

1525 }

1526 }

1527

1528 static void TestCharLength()

1529 {

1530 const int32_t codepoint[]={

1531 1, 0x0061,

1532 1, 0xe065,

1533 1, 0x20ac,

1534 2, 0x20402,

1535 2, 0x23456,

1536 2, 0x24506,

1537 2, 0x20402,

1538 2, 0x10402,

1539 1, 0xd7ff,

1540 1, 0xe000

1541 };

1542

1543 int32_t i;

1544 UBool multiple;

1545 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+ 2)){

1546 UChar32 c=codepoint[i+1];

1547 if(UTF_CHAR_LENGTH(c) != codepoint[i] \|\| U16_LENGTH(c) != codepoint[i]){

1548 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));

1549 }

1550 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);

1551 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){

1552 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c );

1553 }

1554 }

1555 }

1556

1557 /internal functions ----/

1558 static int32_t MakeProp(char* str)

1559 {

1560 int32_t result = 0;

1561 char* matchPosition =0;

1562

1563 matchPosition = strstr(tagStrings, str);

1564 if (matchPosition == 0)

1565 {

1566 log_err("unrecognized type letter ");

1567 log_err(str);

1568 }

1569 else

1570 result = (int32_t)((matchPosition - tagStrings) / 2);

1571 return result;

1572 }

1573

1574 static int32_t MakeDir(char* str)

1575 {

1576 int32_t pos = 0;

1577 for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {

1578 if (strcmp(str, dirStrings[pos]) == 0) {

1579 return pos;

1580 }

1581 }

1582 return -1;

1583 }

1584

1585 /* test u_charName() -------------------------------------------------------- */

1586

1587 static const struct {

1588 uint32_t code;

1589 const char name, oldName, extName, alias;

1590 } names[]={

1591 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},

1592 {0x01a2, "LATIN CAPITAL LETTER OI", "",

1593 "LATIN CAPITAL LETTER OI",

1594 "LATIN CAPITAL LETTER GHA"},

1595 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",

1596 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },

1597 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",

1598 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",

1599 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},

1600 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },

1601 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },

1602 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },

1603 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },

1604 {0xd800, "", "", "<lead surrogate-D800>" },

1605 {0xdc00, "", "", "<trail surrogate-DC00>" },

1606 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },

1607 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },

1608 {0xffff, "", "", "<noncharacter-FFFF>" },

1609 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",

1610 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",

1611 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},

1612 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }

1613 };

1614

1615 static UBool

1616 enumCharNamesFn(void *context,

1617 UChar32 code, UCharNameChoice nameChoice,

1618 const char *name, int32_t length) {

1619 int32_t pCount=(int32_t )context;

1620 const char *expected;

1621 int i;

1622

1623 if(length<=0 \|\| length!=(int32_t)strlen(name)) {

1624 /* should not be called with an empty string or invalid length */

1625 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);

1626 return TRUE;

1627 }

1628

1629 ++*pCount;

1630 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {

1631 if(code==(UChar32)names[i].code) {

1632 switch (nameChoice) {

1633 case U_EXTENDED_CHAR_NAME:

1634 if(0!=strcmp(name, names[i].extName)) {

1635 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);

1636 }

1637 break;

1638 case U_UNICODE_CHAR_NAME:

1639 if(0!=strcmp(name, names[i].name)) {

1640 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code , name, names[i].name);

1641 }

1642 break;

1643 case U_UNICODE_10_CHAR_NAME:

1644 expected=names[i].oldName;

1645 if(expected[0]==0 \|\| 0!=strcmp(name, expected)) {

1646 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n" , code, name, expected);

1647 }

1648 break;

1649 case U_CHAR_NAME_ALIAS:

1650 expected=names[i].alias;

1651 if(expected==NULL \|\| expected[0]==0 \|\| 0!=strcmp(name, expec ted)) {

1652 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\ n", code, name, expected);

1653 }

1654 break;

1655 case U_CHAR_NAME_CHOICE_COUNT:

1656 break;

1657 }

1658 break;

1659 }

1660 }

1661 return TRUE;

1662 }

1663

1664 struct enumExtCharNamesContext {

1665 uint32_t length;

1666 int32_t last;

1667 };

1668

1669 static UBool

1670 enumExtCharNamesFn(void *context,

1671 UChar32 code, UCharNameChoice nameChoice,

1672 const char *name, int32_t length) {

1673 struct enumExtCharNamesContext ecncp = (struct enumExtCharNamesContext ) c ontext;

1674

1675 if (ecncp->last != (int32_t) code - 1) {

1676 if (ecncp->last < 0) {

1677 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ex t) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1 );

1678 } else {

1679 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);

1680 }

1681 }

1682 ecncp->last = (int32_t) code;

1683

1684 if (!*name) {

1685 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", c ode);

1686 }

1687

1688 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);

1689 }

1690

1691 /**

1692 * This can be made more efficient by moving it into putil.c and having

1693 * it directly access the ebcdic translation tables.

1694 * TODO: If we get this method in putil.c, then delete it from here.

1695 */

1696 static UChar

1697 u_charToUChar(char c) {

1698 UChar uc;

1699 u_charsToUChars(&c, &uc, 1);

1700 return uc;

1701 }

1702

1703 static void

1704 TestCharNames() {

1705 static char name[80];

1706 UErrorCode errorCode=U_ZERO_ERROR;

1707 struct enumExtCharNamesContext extContext;

1708 const char *expected;

1709 int32_t length;

1710 UChar32 c;

1711 int32_t i;

1712

1713 log_verbose("Testing uprv_getMaxCharNameLength()\n");

1714 length=uprv_getMaxCharNameLength();

1715 if(length==0) {

1716 /* no names data available */

1717 return;

1718 }

1719 if(length<83) { /* Unicode 3.2 max char name length */

1720 log_err("uprv_getMaxCharNameLength()=%d is too short");

1721 }

1722 /* ### TODO same tests for max ISO comment length as for max name length */

1723

1724 log_verbose("Testing u_charName()\n");

1725 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {

1726 /* modern Unicode character name */

1727 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name) , &errorCode);

1728 if(U_FAILURE(errorCode)) {

1729 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(e rrorCode));

1730 return;

1731 }

1732 if(length<0 \|\| 0!=strcmp(name, names[i].name) \|\| length!=(uint16_t)strle n(name)) {

1733 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);

1734 }

1735

1736 /* find the modern name */

1737 if (*names[i].name) {

1738 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);

1739 if(U_FAILURE(errorCode)) {

1740 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorN ame(errorCode));

1741 return;

1742 }

1743 if(c!=(UChar32)names[i].code) {

1744 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", name s[i].name, c, names[i].code);

1745 }

1746 }

1747

1748 /* Unicode 1.0 character name */

1749 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(na me), &errorCode);

1750 if(U_FAILURE(errorCode)) {

1751 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_error Name(errorCode));

1752 return;

1753 }

1754 if(length<0 \|\| (length>0 && 0!=strcmp(name, names[i].oldName)) \|\| length !=(uint16_t)strlen(name)) {

1755 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothi ng or %s\n", names[i].code, name, length, names[i].oldName);

1756 }

1757

1758 /* find the Unicode 1.0 name if it is stored (length>0 means that we cou ld read it) */

1759 if(names[i].oldName[0]!=0 /* && length>0 */) {

1760 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCod e);

1761 if(U_FAILURE(errorCode)) {

1762 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));

1763 return;

1764 }

1765 if(c!=(UChar32)names[i].code) {

1766 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n" , names[i].oldName, c, names[i].code);

1767 }

1768 }

1769

1770 /* Unicode character name alias */

1771 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);

1772 if(U_FAILURE(errorCode)) {

1773 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_err orName(errorCode));

1774 return;

1775 }

1776 expected=names[i].alias;

1777 if(expected==NULL) {

1778 expected="";

1779 }

1780 if(length<0 \|\| (length>0 && 0!=strcmp(name, expected)) \|\| length!=(uint1 6_t)strlen(name)) {

1781 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of not hing or %s\n",

1782 names[i].code, name, length, expected);

1783 }

1784

1785 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */

1786 if(expected[0]!=0 /* && length>0 */) {

1787 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);

1788 if(U_FAILURE(errorCode)) {

1789 log_err("u_charFromName(%s - alias) error %s\n",

1790 expected, u_errorName(errorCode));

1791 return;

1792 }

1793 if(c!=(UChar32)names[i].code) {

1794 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\ n",

1795 expected, c, names[i].code);

1796 }

1797 }

1798 }

1799

1800 /* test u_enumCharNames() */

1801 length=0;

1802 errorCode=U_ZERO_ERROR;

1803 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &leng th, U_UNICODE_CHAR_NAME, &errorCode);

1804 if(U_FAILURE(errorCode) \|\| length<94140) {

1805 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MI N_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);

1806 }

1807

1808 extContext.length = 0;

1809 extContext.last = -1;

1810 errorCode=U_ZERO_ERROR;

1811 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &e xtContext, U_EXTENDED_CHAR_NAME, &errorCode);

1812 if(U_FAILURE(errorCode) \|\| extContext.length<UCHAR_MAX_VALUE + 1) {

1813 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld \n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.le ngth);

1814 }

1815

1816 /* test that u_charFromName() uppercases the input name, i.e., works with mi xed-case names (new in 2.0) */

1817 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorC ode)) {

1818 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") d id not find U+0061 (%s)\n", u_errorName(errorCode));

1819 }

1820

1821 /* Test getCharNameCharacters */

1822 if(!getTestOption(QUICK_OPTION)) {

1823 enum { BUFSIZE = 256 };

1824 UErrorCode ec = U_ZERO_ERROR;

1825 char buf[BUFSIZE];

1826 int32_t maxLength;

1827 UChar32 cp;

1828 UChar pat[BUFSIZE], dumbPat[BUFSIZE];

1829 int32_t l1, l2;

1830 UBool map[256];

1831 UBool ok;

1832

1833 USet* set = uset_open(1, 0); /* empty set */

1834 USet* dumb = uset_open(1, 0); /* empty set */

1835

1836 /*

1837 * uprv_getCharNameCharacters() will likely return more lowercase

1838 * letters than actual character names contain because

1839 * it includes all the characters in lowercased names of

1840 * general categories, for the full possible set of extended names.

1841 */

1842 {

1843 USetAdder sa={

1844 NULL,

1845 uset_add,

1846 uset_addRange,

1847 uset_addString,

1848 NULL /* don't need remove() */

1849 };

1850 sa.set=set;

1851 uprv_getCharNameCharacters(&sa);

1852 }

1853

1854 /* build set the dumb (but sure-fire) way */

1855 for (i=0; i<256; ++i) {

1856 map[i] = FALSE;

1857 }

1858

1859 maxLength=0;

1860 for (cp=0; cp<0x110000; ++cp) {

1861 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,

1862 buf, BUFSIZE, &ec);

1863 if (U_FAILURE(ec)) {

1864 log_err("FAIL: u_charName failed when it shouldn't\n");

1865 uset_close(set);

1866 uset_close(dumb);

1867 return;

1868 }

1869 if(len>maxLength) {

1870 maxLength=len;

1871 }

1872

1873 for (i=0; i<len; ++i) {

1874 if (!map[(uint8_t) buf[i]]) {

1875 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));

1876 map[(uint8_t) buf[i]] = TRUE;

1877 }

1878 }

1879

1880 /* test for leading/trailing whitespace */

1881 if(buf[0]==' ' \|\| buf[0]=='\t' \|\| buf[len-1]==' ' \|\| buf[len-1]=='\t ') {

1882 log_err("u_charName(U+%04x) returns a name with leading or trail ing whitespace\n", cp);

1883 }

1884 }

1885

1886 if(map[(uint8_t)'\t']) {

1887 log_err("u_charName() returned a name with a TAB for some code point \n", cp);

1888 }

1889

1890 length=uprv_getMaxCharNameLength();

1891 if(length!=maxLength) {

1892 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum len gth %d of all extended names\n",

1893 length, maxLength);

1894 }

1895

1896 /* compare the sets. Where is my uset_equals?!! */

1897 ok=TRUE;

1898 for(i=0; i<256; ++i) {

1899 if(uset_contains(set, i)!=uset_contains(dumb, i)) {

1900 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !use t_contains(dumb, i)) {

1901 /* ignore lowercase a-z that are in set but not in dumb */

1902 ok=TRUE;

1903 } else {

1904 ok=FALSE;

1905 break;

1906 }

1907 }

1908 }

1909

1910 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);

1911 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);

1912 if (U_FAILURE(ec)) {

1913 log_err("FAIL: uset_toPattern failed when it shouldn't\n");

1914 uset_close(set);

1915 uset_close(dumb);

1916 return;

1917 }

1918

1919 if (l1 >= BUFSIZE) {

1920 l1 = BUFSIZE-1;

1921 pat[l1] = 0;

1922 }

1923 if (l2 >= BUFSIZE) {

1924 l2 = BUFSIZE-1;

1925 dumbPat[l2] = 0;

1926 }

1927

1928 if (!ok) {

1929 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",

1930 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));

1931 } else if(getTestOption(VERBOSITY_OPTION)) {

1932 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescst rdup(pat, l1));

1933 }

1934

1935 uset_close(set);

1936 uset_close(dumb);

1937 }

1938

1939 /* ### TODO: test error cases and other interesting things */

1940 }

1941

1942 static void

1943 TestUCharFromNameUnderflow() {

1944 // Ticket #10889: Underflow crash when there is no dash.

1945 UErrorCode errorCode=U_ZERO_ERROR;

1946 UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCod e);

1947 if(U_SUCCESS(errorCode)) {

1948 log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\ n", c, u_errorName(errorCode));

1949 }

1950

1951 // Test related edge cases.

1952 errorCode=U_ZERO_ERROR;

1953 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);

1954 if(U_SUCCESS(errorCode)) {

1955 log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_ errorName(errorCode));

1956 }

1957

1958 errorCode=U_ZERO_ERROR;

1959 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);

1960 if(U_SUCCESS(errorCode)) {

1961 log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));

1962 }

1963

1964 errorCode=U_ZERO_ERROR;

1965 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);

1966 if(U_SUCCESS(errorCode)) {

1967 log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\ n", c, u_errorName(errorCode));

1968 }

1969 }

1970

1971 /* test u_isMirrored() and u_charMirror() ----------------------------------- */

1972

1973 static void

1974 TestMirroring() {

1975 USet *set;

1976 UErrorCode errorCode;

1977

1978 UChar32 start, end, c2, c3;

1979 int32_t i;

1980

1981 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);

1982

1983 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);

1984

1985 log_verbose("Testing u_isMirrored()\n");

1986 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_i sMirrored(0x232a) &&

1987 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && ! u_isMirrored(0x3400)

1988 )

1989 ) {

1990 log_err("u_isMirrored() does not work correctly\n");

1991 }

1992

1993 log_verbose("Testing u_charMirror()\n");

1994 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x 208d)==0x208e && u_charMirror(0x3017)==0x3016 &&

1995 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirro r(0x29F5)==0x2215 && /* large delta between the code points */

1996 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror( 0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&

1997 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrig endum6.html */

1998 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charM irror(0x301d)==0x301d

1999 )

2000 ) {

2001 log_err("u_charMirror() does not work correctly\n");

2002 }

2003

2004 /* verify that Bidi_Mirroring_Glyph roundtrips */

2005 errorCode=U_ZERO_ERROR;

2006 set=uset_openPattern(mirroredPattern, 17, &errorCode);

2007

2008 if (U_FAILURE(errorCode)) {

2009 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\ n");

2010 } else {

2011 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i ) {

2012 do {

2013 c2=u_charMirror(start);

2014 c3=u_charMirror(c2);

2015 if(c3!=start) {

2016 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx ->U+%04lx\n", (long)start, (long)c2, (long)c3);

2017 }

2018 c3=u_getBidiPairedBracket(start);

2019 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)= =U_BPT_NONE) {

2020 if(c3!=start) {

2021 log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt (c)==None\n",

2022 (long)start);

2023 }

2024 } else {

2025 if(c3!=c2) {

2026 log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bm g(c)'\n",

2027 (long)start, (long)c2);

2028 }

2029 }

2030 } while(++start<=end);

2031 }

2032 }

2033

2034 uset_close(set);

2035 }

2036

2037

2038 struct RunTestData

2039 {

2040 const char *runText;

2041 UScriptCode runCode;

2042 };

2043

2044 typedef struct RunTestData RunTestData;

2045

2046 static void

2047 CheckScriptRuns(UScriptRun scriptRun, int32_t runStarts, const RunTestData *te stData, int32_t nRuns,

2048 const char *prefix)

2049 {

2050 int32_t run, runStart, runLimit;

2051 UScriptCode runCode;

2052

2053 /* iterate over all the runs */

2054 run = 0;

2055 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {

2056 if (runStart != runStarts[run]) {

2057 log_err("%s: incorrect start offset for run %d: expected %d, got %d\ n",

2058 prefix, run, runStarts[run], runStart);

2059 }

2060

2061 if (runLimit != runStarts[run + 1]) {

2062 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\ n",

2063 prefix, run, runStarts[run + 1], runLimit);

2064 }

2065

2066 if (runCode != testData[run].runCode) {

2067 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\ "\n",

2068 prefix, run, uscript_getName(testData[run].runCode), uscript_get Name(runCode));

2069 }

2070

2071 run += 1;

2072

2073 /* stop when we've seen all the runs we expect to see */

2074 if (run >= nRuns) {

2075 break;

2076 }

2077 }

2078

2079 /* Complain if we didn't see then number of runs we expected */

2080 if (run != nRuns) {

2081 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, r un, nRuns);

2082 }

2083 }

2084

2085 static void

2086 TestUScriptRunAPI()

2087 {

2088 static const RunTestData testData1[] = {

2089 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCR IPT_DEVANAGARI},

2090 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARA BIC},

2091 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYR ILLIC},

2092 {"English (", USCRIPT_LATIN},

2093 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},

2094 {") ", USCRIPT_LATIN},

2095 {"\\u6F22\\u5B75", USCRIPT_HAN},

2096 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},

2097 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},

2098 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}

2099 };

2100

2101 static const RunTestData testData2[] = {

2102 {"((((((((((abc))))))))))", USCRIPT_LATIN}

2103 };

2104

2105 static const struct {

2106 const RunTestData *testData;

2107 int32_t nRuns;

2108 } testDataEntries[] = {

2109 {testData1, UPRV_LENGTHOF(testData1)},

2110 {testData2, UPRV_LENGTHOF(testData2)}

2111 };

2112

2113 static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);

2114 int32_t testEntry;

2115

2116 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {

2117 UChar testString[1024];

2118 int32_t runStarts[256];

2119 int32_t nTestRuns = testDataEntries[testEntry].nRuns;

2120 const RunTestData *testData = testDataEntries[testEntry].testData;

2121

2122 int32_t run, stringLimit;

2123 UScriptRun *scriptRun = NULL;

2124 UErrorCode err;

2125

2126 /*

2127 * Fill in the test string and the runStarts array.

2128 */

2129 stringLimit = 0;

2130 for (run = 0; run < nTestRuns; run += 1) {

2131 runStarts[run] = stringLimit;

2132 stringLimit += u_unescape(testData[run].runText, &testString[stringL imit], 1024 - stringLimit);

2133 /stringLimit -= 1;/

2134 }

2135

2136 /* The limit of the last run */

2137 runStarts[nTestRuns] = stringLimit;

2138

2139 /*

2140 * Make sure that calling uscript_OpenRun with a NULL text pointer

2141 * and a non-zero text length returns the correct error.

2142 */

2143 err = U_ZERO_ERROR;

2144 scriptRun = uscript_openRun(NULL, stringLimit, &err);

2145

2146 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

2147 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instea d of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

2148 }

2149

2150 if (scriptRun != NULL) {

2151 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NUL L result.\n");

2152 uscript_closeRun(scriptRun);

2153 }

2154

2155 /*

2156 * Make sure that calling uscript_OpenRun with a non-NULL text pointer

2157 * and a zero text length returns the correct error.

2158 */

2159 err = U_ZERO_ERROR;

2160 scriptRun = uscript_openRun(testString, 0, &err);

2161

2162 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

2163 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

2164 }

2165

2166 if (scriptRun != NULL) {

2167 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL re sult.\n");

2168 uscript_closeRun(scriptRun);

2169 }

2170

2171 /*

2172 * Make sure that calling uscript_openRun with a NULL text pointer

2173 * and a zero text length doesn't return an error.

2174 */

2175 err = U_ZERO_ERROR;

2176 scriptRun = uscript_openRun(NULL, 0, &err);

2177

2178 if (U_FAILURE(err)) {

2179 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_erro rName(err));

2180 }

2181

2182 /* Make sure that the empty iterator doesn't find any runs */

2183 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {

2184 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n ");

2185 }

2186

2187 /*

2188 * Make sure that calling uscript_setRunText with a NULL text pointer

2189 * and a non-zero text length returns the correct error.

2190 */

2191 err = U_ZERO_ERROR;

2192 uscript_setRunText(scriptRun, NULL, stringLimit, &err);

2193

2194 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

2195 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) retu rned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

2196 }

2197

2198 /*

2199 * Make sure that calling uscript_OpenRun with a non-NULL text pointer

2200 * and a zero text length returns the correct error.

2201 */

2202 err = U_ZERO_ERROR;

2203 uscript_setRunText(scriptRun, testString, 0, &err);

2204

2205 if (err != U_ILLEGAL_ARGUMENT_ERROR) {

2206 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));

2207 }

2208

2209 /*

2210 * Now call uscript_setRunText on the empty iterator

2211 * and make sure that it works.

2212 */

2213 err = U_ZERO_ERROR;

2214 uscript_setRunText(scriptRun, testString, stringLimit, &err);

2215

2216 if (U_FAILURE(err)) {

2217 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(e rr));

2218 } else {

2219 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_ setRunText");

2220 }

2221

2222 uscript_closeRun(scriptRun);

2223

2224 /*

2225 * Now open an interator over the testString

2226 * using uscript_openRun and make sure that it works

2227 */

2228 scriptRun = uscript_openRun(testString, stringLimit, &err);

2229

2230 if (U_FAILURE(err)) {

2231 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err) );

2232 } else {

2233 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_ openRun");

2234 }

2235

2236 /* Now reset the iterator, and make sure

2237 * that it still works.

2238 */

2239 uscript_resetRun(scriptRun);

2240

2241 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_rese tRun");

2242

2243 /* Close the iterator */

2244 uscript_closeRun(scriptRun);

2245 }

2246 }

2247

2248 /* test additional, non-core properties */

2249 static void

2250 TestAdditionalProperties() {

2251 /* test data for u_charAge() */

2252 static const struct {

2253 UChar32 c;

2254 UVersionInfo version;

2255 } charAges[]={

2256 {0x41, { 1, 1, 0, 0 }},

2257 {0xffff, { 1, 1, 0, 0 }},

2258 {0x20ab, { 2, 0, 0, 0 }},

2259 {0x2fffe, { 2, 0, 0, 0 }},

2260 {0x20ac, { 2, 1, 0, 0 }},

2261 {0xfb1d, { 3, 0, 0, 0 }},

2262 {0x3f4, { 3, 1, 0, 0 }},

2263 {0x10300, { 3, 1, 0, 0 }},

2264 {0x220, { 3, 2, 0, 0 }},

2265 {0xff60, { 3, 2, 0, 0 }}

2266 };

2267

2268 /* test data for u_hasBinaryProperty() */

2269 static const int32_t

2270 props[][3]={ /* code point, property, value */

2271 { 0x0627, UCHAR_ALPHABETIC, TRUE },

2272 { 0x1034a, UCHAR_ALPHABETIC, TRUE },

2273 { 0x2028, UCHAR_ALPHABETIC, FALSE },

2274

2275 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },

2276 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },

2277

2278 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },

2279 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },

2280

2281 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },

2282 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },

2283

2284 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrige ndum6.html */

2285 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },

2286 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },

2287 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },

2288 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },

2289

2290 { 0x058a, UCHAR_DASH, TRUE },

2291 { 0x007e, UCHAR_DASH, FALSE },

2292

2293 { 0x0c4d, UCHAR_DIACRITIC, TRUE },

2294 { 0x3000, UCHAR_DIACRITIC, FALSE },

2295

2296 { 0x0e46, UCHAR_EXTENDER, TRUE },

2297 { 0x0020, UCHAR_EXTENDER, FALSE },

2298

2299 #if !UCONFIG_NO_NORMALIZATION

2300 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },

2301 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },

2302 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },

2303

2304 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */

2305 { 0x0308, UCHAR_NFD_INERT, FALSE },

2306

2307 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */

2308 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */

2309

2310 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */

2311 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */

2312 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */

2313 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */

2314 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */

2315 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */

2316

2317 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */

2318 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */

2319

2320 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },

2321 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },

2322 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */

2323 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */

2324 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */

2325 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */

2326 #endif

2327

2328 { 0x0044, UCHAR_HEX_DIGIT, TRUE },

2329 { 0xff46, UCHAR_HEX_DIGIT, TRUE },

2330 { 0x0047, UCHAR_HEX_DIGIT, FALSE },

2331

2332 { 0x30fb, UCHAR_HYPHEN, TRUE },

2333 { 0xfe58, UCHAR_HYPHEN, FALSE },

2334

2335 { 0x2172, UCHAR_ID_CONTINUE, TRUE },

2336 { 0x0307, UCHAR_ID_CONTINUE, TRUE },

2337 { 0x005c, UCHAR_ID_CONTINUE, FALSE },

2338

2339 { 0x2172, UCHAR_ID_START, TRUE },

2340 { 0x007a, UCHAR_ID_START, TRUE },

2341 { 0x0039, UCHAR_ID_START, FALSE },

2342

2343 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },

2344 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },

2345 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },

2346

2347 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },

2348 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },

2349

2350 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },

2351 { 0x0345, UCHAR_LOWERCASE, TRUE },

2352 { 0x0030, UCHAR_LOWERCASE, FALSE },

2353

2354 { 0x1d7a9, UCHAR_MATH, TRUE },

2355 { 0x2135, UCHAR_MATH, TRUE },

2356 { 0x0062, UCHAR_MATH, FALSE },

2357

2358 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },

2359 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },

2360 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },

2361

2362 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },

2363 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },

2364 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },

2365

2366 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },

2367 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },

2368

2369 { 0x1d44a, UCHAR_UPPERCASE, TRUE },

2370 { 0x2162, UCHAR_UPPERCASE, TRUE },

2371 { 0x0345, UCHAR_UPPERCASE, FALSE },

2372

2373 { 0x0020, UCHAR_WHITE_SPACE, TRUE },

2374 { 0x202f, UCHAR_WHITE_SPACE, TRUE },

2375 { 0x3001, UCHAR_WHITE_SPACE, FALSE },

2376

2377 { 0x0711, UCHAR_XID_CONTINUE, TRUE },

2378 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },

2379 { 0x007c, UCHAR_XID_CONTINUE, FALSE },

2380

2381 { 0x16ee, UCHAR_XID_START, TRUE },

2382 { 0x23456, UCHAR_XID_START, TRUE },

2383 { 0x1d1aa, UCHAR_XID_START, FALSE },

2384

2385 /*

2386 * Version break:

2387 * The following properties are only supported starting with the

2388 * Unicode version indicated in the second field.

2389 */

2390 { -1, 0x320, 0 },

2391

2392 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },

2393 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },

2394 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },

2395

2396 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */

2397 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */

2398 { 0xe0001, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */

2399 { 0xe0100, UCHAR_DEPRECATED, FALSE },

2400

2401 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },

2402 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },

2403 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },

2404 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */

2405

2406 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },

2407 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },

2408 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */

2409 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },

2410

2411 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },

2412 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },

2413

2414 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },

2415 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },

2416

2417 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },

2418 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },

2419

2420 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },

2421 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },

2422

2423 { 0x2e9b, UCHAR_RADICAL, TRUE },

2424 { 0x4e00, UCHAR_RADICAL, FALSE },

2425

2426 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },

2427 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },

2428

2429 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },

2430 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },

2431

2432 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */

2433

2434 { 0x002e, UCHAR_S_TERM, TRUE },

2435 { 0x0061, UCHAR_S_TERM, FALSE },

2436

2437 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },

2438 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },

2439 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },

2440 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },

2441

2442 /* enum/integer type properties */

2443

2444 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */

2445 /* test default Bidi classes for unassigned code points */

2446 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2447 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2448 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2449 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Uni code 5.0 */

2450 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */

2451 { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2452 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2453 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2454 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2455 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2456 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2457

2458 { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2459 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2460 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2461 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2462 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2463 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2464 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2465

2466 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },

2467 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },

2468 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },

2469 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },

2470 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },

2471 { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

2472 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },

2473 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },

2474 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

2475 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },

2476 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },

2477

2478 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in Tes tUnicodeData() */

2479 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },

2480

2481 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },

2482 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },

2483 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },

2484 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

2485 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

2486 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },

2487 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },

2488 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },

2489 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },

2490

2491 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

2492 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },

2493 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

2494 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },

2495 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2496 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },

2497 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2498 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2499 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2500 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

2501 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

2502 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2503 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },

2504 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */

2505 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },

2506 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

2507 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },

2508

2509 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeD ata() */

2510 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },

2511 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Un icode 5.2 */

2512

2513 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },

2514 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },

2515 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },

2516 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },

2517 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },

2518

2519 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },

2520 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },

2521 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },

2522 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },

2523 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },

2524 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

2525 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

2526 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },

2527

2528 /* TestUnicodeData() verifies that no assigned character has "XX" (unkno wn) */

2529 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },

2530 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },

2531 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },

2532 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },

2533 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

2534 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

2535 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

2536 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },

2537 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },

2538 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },

2539 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },

2540 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },

2541 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },

2542 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },

2543 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },

2544

2545 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */

2546

2547 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */

2548

2549 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2550 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

2551 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

2552 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

2553 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

2554 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

2555 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },

2556

2557 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2558 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

2559 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* chang ed in Unicode 5.2 */

2560 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2561

2562 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

2563 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

2564 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

2565 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },

2566 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

2567 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

2568

2569 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2570 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

2571 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* chang ed in Unicode 5.2 */

2572 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2573

2574 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

2575 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

2576 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

2577 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },

2578 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

2579 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

2580 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2581

2582 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2583 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

2584 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* chang ed in Unicode 5.2 */

2585 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2586

2587 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

2588 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

2589 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

2590 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },

2591

2592 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

2593 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

2594 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

2595 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

2596 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },

2597

2598 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },

2599

2600 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */

2601

2602 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },

2603 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },

2604 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },

2605

2606 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },

2607 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },

2608 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },

2609 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },

2610 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },

2611

2612 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },

2613 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC },

2614 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },

2615

2616 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE },

2617 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC },

2618 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },

2619 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },

2620

2621 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },

2622 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },

2623 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },

2624 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },

2625 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },

2626 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },

2627

2628 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },

2629 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },

2630 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },

2631 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },

2632

2633 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },

2634 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },

2635 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },

2636 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },

2637

2638 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },

2639 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },

2640 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },

2641 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },

2642

2643 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */

2644

2645 /* unassigned code points in new default Bidi R blocks */

2646 { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2647 { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

2648

2649 /* test some script codes >127 */

2650 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM },

2651 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },

2652 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },

2653

2654 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */

2655

2656 /* value changed in Unicode 6.0 */

2657 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },

2658

2659 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */

2660

2661 /* unassigned code points in new/changed default Bidi AL blocks */

2662 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2663 { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },

2664

2665 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */

2666

2667 /* unassigned code points in the currency symbols block now default to E T */

2668 { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },

2669 { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },

2670

2671 /* new property in Unicode 6.3 */

2672 { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },

2673 { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },

2674 { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },

2675 { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },

2676 { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },

2677 { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },

2678

2679 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */

2680

2681 /* new character range with Joining_Group values */

2682 { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },

2683 { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },

2684 { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },

2685 { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },

2686 { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },

2687

2688 /* undefined UProperty values */

2689 { 0x61, 0x4a7, 0 },

2690 { 0x234bc, 0x15ed, 0 }

2691 };

2692

2693 UVersionInfo version;

2694 UChar32 c;

2695 int32_t i, result, uVersion;

2696 UProperty which;

2697

2698 /* what is our Unicode version? */

2699 u_getUnicodeVersion(version);

2700 uVersion=((int32_t)version[0]<<8)\|(version[1]<<4)\|version[2]; /* major/minor /update version numbers */

2701

2702 u_charAge(0x20, version);

2703 if(version[0]==0) {

2704 /* no additional properties available */

2705 log_err("TestAdditionalProperties: no additional properties available, n ot tested\n");

2706 return;

2707 }

2708

2709 /* test u_charAge() */

2710 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {

2711 u_charAge(charAges[i].c, version);

2712 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {

2713 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { % u, %u, %u, %u }\n",

2714 charAges[i].c,

2715 version[0], version[1], version[2], version[3],

2716 charAges[i].version[0], charAges[i].version[1], charAges[i].vers ion[2], charAges[i].version[3]);

2717 }

2718 }

2719

2720 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 \|\|

2721 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 \|\|

2722 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 \|\| /* j2478 */

2723 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 \|\| /JB#2410/

2724 u_getIntPropertyMinValue(0x2345)!=0

2725 ) {

2726 log_err("error: u_getIntPropertyMinValue() wrong\n");

2727 }

2728 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {

2729 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");

2730 }

2731 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {

2732 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");

2733 }

2734 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {

2735 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n") ;

2736 }

2737 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_CO UNT-1 ) {

2738 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");

2739 }

2740 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {

2741 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");

2742 }

2743 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {

2744 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");

2745 }

2746 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {

2747 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");

2748 }

2749 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {

2750 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");

2751 }

2752 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGOR Y_COUNT-1) {

2753 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n ");

2754 }

2755 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUN T-1) {

2756 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wro ng\n");

2757 }

2758 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_CO UNT-1) {

2759 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) w rong\n");

2760 }

2761 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {

2762 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n") ;

2763 }

2764 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {

2765 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");

2766 }

2767 if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_ COUNT-1) {

2768 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");

2769 }

2770 /JB#2410/

2771 if( u_getIntPropertyMaxValue(0x2345)!=-1) {

2772 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");

2773 }

2774 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_CO UNT - 1)) {

2775 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong \n");

2776 }

2777 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) {

2778 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");

2779 }

2780 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1 )) {

2781 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");

2782 }

2783 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUN T -1)) {

2784 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n ");

2785 }

2786

2787 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */

2788 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {

2789 const char *whichName;

2790

2791 if(props[i][0]<0) {

2792 /* Unicode version break */

2793 if(uVersion<props[i][1]) {

2794 break; /* do not test properties that are not yet supported */

2795 } else {

2796 continue; /* skip this row */

2797 }

2798 }

2799

2800 c=(UChar32)props[i][0];

2801 which=(UProperty)props[i][1];

2802 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);

2803

2804 if(which<UCHAR_INT_START) {

2805 result=u_hasBinaryProperty(c, which);

2806 if(result!=props[i][2]) {

2807 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wron g (props[%d]) - (Are you missing data?)\n",

2808 c, whichName, result, i);

2809 }

2810 }

2811

2812 result=u_getIntPropertyValue(c, which);

2813 if(result!=props[i][2]) {

2814 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",

2815 c, whichName, result, props[i][2], i);

2816 }

2817

2818 /* test separate functions, too */

2819 switch((UProperty)props[i][1]) {

2820 case UCHAR_ALPHABETIC:

2821 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {

2822 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d]) \n",

2823 props[i][0], result, i);

2824 }

2825 break;

2826 case UCHAR_LOWERCASE:

2827 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {

2828 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\ n",

2829 props[i][0], result, i);

2830 }

2831 break;

2832 case UCHAR_UPPERCASE:

2833 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {

2834 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\ n",

2835 props[i][0], result, i);

2836 }

2837 break;

2838 case UCHAR_WHITE_SPACE:

2839 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {

2840 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d]) \n",

2841 props[i][0], result, i);

2842 }

2843 break;

2844 default:

2845 break;

2846 }

2847 }

2848 }

2849

2850 static void

2851 TestNumericProperties(void) {

2852 /* see UnicodeData.txt, DerivedNumericValues.txt */

2853 static const struct {

2854 UChar32 c;

2855 int32_t type;

2856 double numValue;

2857 } values[]={

2858 { 0x0F33, U_NT_NUMERIC, -1./2. },

2859 { 0x0C66, U_NT_DECIMAL, 0 },

2860 { 0x96f6, U_NT_NUMERIC, 0 },

2861 { 0xa833, U_NT_NUMERIC, 1./16. },

2862 { 0x2152, U_NT_NUMERIC, 1./10. },

2863 { 0x2151, U_NT_NUMERIC, 1./9. },

2864 { 0x1245f, U_NT_NUMERIC, 1./8. },

2865 { 0x2150, U_NT_NUMERIC, 1./7. },

2866 { 0x2159, U_NT_NUMERIC, 1./6. },

2867 { 0x09f6, U_NT_NUMERIC, 3./16. },

2868 { 0x2155, U_NT_NUMERIC, 1./5. },

2869 { 0x00BD, U_NT_NUMERIC, 1./2. },

2870 { 0x0031, U_NT_DECIMAL, 1. },

2871 { 0x4e00, U_NT_NUMERIC, 1. },

2872 { 0x58f1, U_NT_NUMERIC, 1. },

2873 { 0x10320, U_NT_NUMERIC, 1. },

2874 { 0x0F2B, U_NT_NUMERIC, 3./2. },

2875 { 0x00B2, U_NT_DIGIT, 2. },

2876 { 0x5f10, U_NT_NUMERIC, 2. },

2877 { 0x1813, U_NT_DECIMAL, 3. },

2878 { 0x5f0e, U_NT_NUMERIC, 3. },

2879 { 0x2173, U_NT_NUMERIC, 4. },

2880 { 0x8086, U_NT_NUMERIC, 4. },

2881 { 0x278E, U_NT_DIGIT, 5. },

2882 { 0x1D7F2, U_NT_DECIMAL, 6. },

2883 { 0x247A, U_NT_DIGIT, 7. },

2884 { 0x7396, U_NT_NUMERIC, 9. },

2885 { 0x1372, U_NT_NUMERIC, 10. },

2886 { 0x216B, U_NT_NUMERIC, 12. },

2887 { 0x16EE, U_NT_NUMERIC, 17. },

2888 { 0x249A, U_NT_NUMERIC, 19. },

2889 { 0x303A, U_NT_NUMERIC, 30. },

2890 { 0x5345, U_NT_NUMERIC, 30. },

2891 { 0x32B2, U_NT_NUMERIC, 37. },

2892 { 0x1375, U_NT_NUMERIC, 40. },

2893 { 0x10323, U_NT_NUMERIC, 50. },

2894 { 0x0BF1, U_NT_NUMERIC, 100. },

2895 { 0x964c, U_NT_NUMERIC, 100. },

2896 { 0x217E, U_NT_NUMERIC, 500. },

2897 { 0x2180, U_NT_NUMERIC, 1000. },

2898 { 0x4edf, U_NT_NUMERIC, 1000. },

2899 { 0x2181, U_NT_NUMERIC, 5000. },

2900 { 0x137C, U_NT_NUMERIC, 10000. },

2901 { 0x4e07, U_NT_NUMERIC, 10000. },

2902 { 0x12432, U_NT_NUMERIC, 216000. },

2903 { 0x12433, U_NT_NUMERIC, 432000. },

2904 { 0x4ebf, U_NT_NUMERIC, 100000000. },

2905 { 0x5146, U_NT_NUMERIC, 1000000000000. },

2906 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },

2907 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },

2908 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },

2909 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },

2910 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },

2911 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },

2912 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },

2913 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }

2914 };

2915

2916 double nv;

2917 UChar32 c;

2918 int32_t i, type;

2919

2920 for(i=0; i<UPRV_LENGTHOF(values); ++i) {

2921 c=values[i].c;

2922 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);

2923 nv=u_getNumericValue(c);

2924

2925 if(type!=values[i].type) {

2926 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, va lues[i].type);

2927 }

2928 if(0.000001 <= fabs(nv - values[i].numValue)) {

2929 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, value s[i].numValue);

2930 }

2931 }

2932 }

2933

2934 /**

2935 * Test the property names and property value names API.

2936 */

2937 static void

2938 TestPropertyNames(void) {

2939 int32_t p, v, choice=0, rev;

2940 UBool atLeastSomething = FALSE;

2941

2942 for (p=0; ; ++p) {

2943 UProperty propEnum = (UProperty)p;

2944 UBool sawProp = FALSE;

2945 if(p > 10 && !atLeastSomething) {

2946 log_data_err("Never got anything after 10 tries.\nYour data is probabl y fried. Quitting this test\n", p, choice);

2947 return;

2948 }

2949

2950 for (choice=0; ; ++choice) {

2951 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice) choice);

2952 if (name) {

2953 if (!sawProp)

2954 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);

2955 log_verbose("%d=\"%s\"", choice, name);

2956 sawProp = TRUE;

2957 atLeastSomething = TRUE;

2958

2959 /* test reverse mapping */

2960 rev = u_getPropertyEnum(name);

2961 if (rev != p) {

2962 log_err("Property round-trip failure: %d -> %s -> %d\n",

2963 p, name, rev);

2964 }

2965 }

2966 if (!name && choice>0) break;

2967 }

2968 if (sawProp) {

2969 /* looks like a valid property; check the values */

2970 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME );

2971 int32_t max = 0;

2972 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {

2973 max = 255;

2974 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {

2975 /* it's far too slow to iterate all the way up to

2976 the real max, U_GC_P_MASK */

2977 max = U_GC_NL_MASK;

2978 } else if (p == UCHAR_BLOCK) {

2979 /* UBlockCodes, unlike other values, start at 1 */

2980 max = 1;

2981 }

2982 log_verbose("\n");

2983 for (v=-1; ; ++v) {

2984 UBool sawValue = FALSE;

2985 for (choice=0; ; ++choice) {

2986 const char* vname = u_getPropertyValueName(propEnum, v, (UPr opertyNameChoice)choice);

2987 if (vname) {

2988 if (!sawValue) log_verbose(" %s, value %d:", pname, v);

2989 log_verbose("%d=\"%s\"", choice, vname);

2990 sawValue = TRUE;

2991

2992 /* test reverse mapping */

2993 rev = u_getPropertyValueEnum(propEnum, vname);

2994 if (rev != v) {

2995 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",

2996 pname, v, vname, rev);

2997 }

2998 }

2999 if (!vname && choice>0) break;

3000 }

3001 if (sawValue) {

3002 log_verbose("\n");

3003 }

3004 if (!sawValue && v>=max) break;

3005 }

3006 }

3007 if (!sawProp) {

3008 if (p>=UCHAR_STRING_LIMIT) {

3009 break;

3010 } else if (p>=UCHAR_DOUBLE_LIMIT) {

3011 p = UCHAR_STRING_START - 1;

3012 } else if (p>=UCHAR_MASK_LIMIT) {

3013 p = UCHAR_DOUBLE_START - 1;

3014 } else if (p>=UCHAR_INT_LIMIT) {

3015 p = UCHAR_MASK_START - 1;

3016 } else if (p>=UCHAR_BINARY_LIMIT) {

3017 p = UCHAR_INT_START - 1;

3018 }

3019 }

3020 }

3021 }

3022

3023 /**

3024 * Test the property values API. See JB#2410.

3025 */

3026 static void

3027 TestPropertyValues(void) {

3028 int32_t i, p, min, max;

3029 UErrorCode ec;

3030

3031 /* Min should be 0 for everything. */

3032 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */

3033 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {

3034 UProperty propEnum = (UProperty)p;

3035 min = u_getIntPropertyMinValue(propEnum);

3036 if (min != 0) {

3037 if (p == UCHAR_BLOCK) {

3038 /* This is okay...for now. See JB#2487.

3039 TODO Update this for JB#2487. */

3040 } else {

3041 const char* name;

3042 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);

3043 if (name == NULL)

3044 name = "<ERROR>";

3045 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",

3046 name, min);

3047 }

3048 }

3049 }

3050

3051 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 \|\|

3052 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {

3053 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK ) is wrong\n");

3054 }

3055

3056 /* Max should be -1 for invalid properties. */

3057 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);

3058 if (max != -1) {

3059 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",

3060 max);

3061 }

3062

3063 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */

3064 for (i=0; i<2; ++i) {

3065 int32_t script;

3066 const char* desc;

3067 ec = U_ZERO_ERROR;

3068 switch (i) {

3069 case 0:

3070 script = uscript_getScript(-1, &ec);

3071 desc = "uscript_getScript(-1)";

3072 break;

3073 case 1:

3074 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);

3075 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";

3076 break;

3077 default:

3078 log_err("Internal test error. Too many scripts\n");

3079 return;

3080 }

3081 /* We don't explicitly test ec. It should be U_FAILURE but it

3082 isn't documented as such. */

3083 if (script != (int32_t)USCRIPT_INVALID_CODE) {

3084 log_err("FAIL: %s = %d, exp. 0\n",

3085 desc, script);

3086 }

3087 }

3088 }

3089

3090 /* various tests for consistency of UCD data and API behavior */

3091 static void

3092 TestConsistency() {

3093 char buffer[300];

3094 USet set1, set2, set3, set4;

3095 UErrorCode errorCode;

3096

3097 UChar32 start, end;

3098 int32_t i, length;

3099

3100 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);

3101 U_STRING_DECL(dashPattern, "[:Dash:]", 8);

3102 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);

3103 U_STRING_DECL(formatPattern, "[:Cf:]", 6);

3104 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);

3105

3106 U_STRING_DECL(mathBlocksPattern,

3107 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe matical Operators:][:block=Mathematical Alphanumeric Symbols:]]",

3108 214);

3109 U_STRING_DECL(mathPattern, "[:Math:]", 8);

3110 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);

3111 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);

3112 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);

3113

3114 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);

3115 U_STRING_INIT(dashPattern, "[:Dash:]", 8);

3116 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);

3117 U_STRING_INIT(formatPattern, "[:Cf:]", 6);

3118 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);

3119

3120 U_STRING_INIT(mathBlocksPattern,

3121 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Sym bols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathe matical Operators:][:block=Mathematical Alphanumeric Symbols:]]",

3122 214);

3123 U_STRING_INIT(mathPattern, "[:Math:]", 8);

3124 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);

3125 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);

3126 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);

3127

3128 /*

3129 * It used to be that UCD.html and its precursors said

3130 * "Those dashes used to mark connections between pieces of words,

3131 * plus the Katakana middle dot."

3132 *

3133 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash

3134 * but not from Hyphen.

3135 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.

3136 * Therefore, do not show errors when testing the Hyphen property.

3137 */

3138 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"

3139 "known to the UTC and not considered errors.\n");

3140

3141 errorCode=U_ZERO_ERROR;

3142 set1=uset_openPattern(hyphenPattern, 10, &errorCode);

3143 set2=uset_openPattern(dashPattern, 8, &errorCode);

3144 if(U_SUCCESS(errorCode)) {

3145 /* remove the Katakana middle dot(s) from set1 */

3146 uset_remove(set1, 0x30fb);

3147 uset_remove(set1, 0xff65); /* halfwidth variant */

3148 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);

3149 } else {

3150 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));

3151 }

3152

3153 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */

3154 set3=uset_openPattern(formatPattern, 6, &errorCode);

3155 set4=uset_openPattern(alphaPattern, 14, &errorCode);

3156 if(U_SUCCESS(errorCode)) {

3157 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);

3158 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);

3159 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);

3160 } else {

3161 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missi ng data?)\n", u_errorName(errorCode));

3162 }

3163

3164 uset_close(set1);

3165 uset_close(set2);

3166 uset_close(set3);

3167 uset_close(set4);

3168

3169 /*

3170 * Check that each lowercase character has "small" in its name

3171 * and not "capital".

3172 * There are some such characters, some of which seem odd.

3173 * Use the verbose flag to see these notices.

3174 */

3175 errorCode=U_ZERO_ERROR;

3176 set1=uset_openPattern(lowerPattern, 13, &errorCode);

3177 if(U_SUCCESS(errorCode)) {

3178 for(i=0;; ++i) {

3179 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);

3180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {

3181 break; /* done */

3182 }

3183 if(U_FAILURE(errorCode)) {

3184 log_err("error iterating over [:Lowercase:] at item %d: %s\n",

3185 i, u_errorName(errorCode));

3186 break;

3187 }

3188 if(length!=0) {

3189 break; /* done with code points, got a string or -1 */

3190 }

3191

3192 while(start<=end) {

3193 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buf fer), &errorCode);

3194 if(U_FAILURE(errorCode)) {

3195 log_data_err("error getting the name of U+%04x - %s\n", star t, u_errorName(errorCode));

3196 errorCode=U_ZERO_ERROR;

3197 }

3198 if( (strstr(buffer, "SMALL")==NULL \|\| strstr(buffer, "CAPITAL")! =NULL) &&

3199 strstr(buffer, "SMALL CAPITAL")==NULL

3200 ) {

3201 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);

3202 }

3203 ++start;

3204 }

3205 }

3206 } else {

3207 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n ", u_errorName(errorCode));

3208 }

3209 uset_close(set1);

3210

3211 /* verify that all assigned characters in Math blocks are exactly Math chara cters */

3212 errorCode=U_ZERO_ERROR;

3213 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);

3214 set2=uset_openPattern(mathPattern, 8, &errorCode);

3215 set3=uset_openPattern(unassignedPattern, 6, &errorCode);

3216 if(U_SUCCESS(errorCode)) {

3217 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */

3218 uset_complement(set3); /* assigned characters */

3219 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */

3220 compareUSets(set1, set2,

3221 "[assigned Math block chars]", "[math blocks]&[:Math:]",

3222 TRUE);

3223 } else {

3224 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Ar e you missing data?)\n", u_errorName(errorCode));

3225 }

3226 uset_close(set1);

3227 uset_close(set2);

3228 uset_close(set3);

3229

3230 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */

3231 errorCode=U_ZERO_ERROR;

3232 set1=uset_openPattern(unknownPattern, 14, &errorCode);

3233 set2=uset_openPattern(reservedPattern, 20, &errorCode);

3234 if(U_SUCCESS(errorCode)) {

3235 compareUSets(set1, set2,

3236 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",

3237 TRUE);

3238 } else {

3239 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));

3240 }

3241 uset_close(set1);

3242 uset_close(set2);

3243 }

3244

3245 /*

3246 * Starting with ICU4C 3.4, the core Unicode properties files

3247 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)

3248 * are hardcoded in the common DLL and therefore not included

3249 * in the data package any more.

3250 * Test requiring these files are disabled so that

3251 * we need not jump through hoops (like adding snapshots of these files

3252 * to testdata).

3253 * See Jitterbug 4497.

3254 */

3255 #define HARDCODED_DATA_4497 1

3256

3257 /* API coverage for ucase.c */

3258 static void TestUCase() {

3259 #if !HARDCODED_DATA_4497

3260 UDataMemory *pData;

3261 UCaseProps *csp;

3262 const UCaseProps *ccsp;

3263 UErrorCode errorCode;

3264

3265 /* coverage for ucase_openBinary() */

3266 errorCode=U_ZERO_ERROR;

3267 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);

3268 if(U_FAILURE(errorCode)) {

3269 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s \n",

3270 u_errorName(errorCode));

3271 return;

3272 }

3273

3274 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);

3275 if(U_FAILURE(errorCode)) {

3276 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",

3277 u_errorName(errorCode));

3278 udata_close(pData);

3279 return;

3280 }

3281

3282 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */

3283 log_err("ucase_openBinary() does not seem to return working UCaseProps\n ");

3284 }

3285

3286 ucase_close(csp);

3287 udata_close(pData);

3288

3289 /* coverage for ucase_getDummy() */

3290 errorCode=U_ZERO_ERROR;

3291 ccsp=ucase_getDummy(&errorCode);

3292 if(ucase_tolower(ccsp, 0x41)!=0x41) {

3293 log_err("ucase_tolower(dummy, A)!=A\n");

3294 }

3295 #endif

3296 }

3297

3298 /* API coverage for ubidi_props.c */

3299 static void TestUBiDiProps() {

3300 #if !HARDCODED_DATA_4497

3301 UDataMemory *pData;

3302 UBiDiProps *bdp;

3303 const UBiDiProps *cbdp;

3304 UErrorCode errorCode;

3305

3306 /* coverage for ubidi_openBinary() */

3307 errorCode=U_ZERO_ERROR;

3308 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);

3309 if(U_FAILURE(errorCode)) {

3310 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s \n",

3311 u_errorName(errorCode));

3312 return;

3313 }

3314

3315 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);

3316 if(U_FAILURE(errorCode)) {

3317 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",

3318 u_errorName(errorCode));

3319 udata_close(pData);

3320 return;

3321 }

3322

3323 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */

3324 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n ");

3325 }

3326

3327 ubidi_closeProps(bdp);

3328 udata_close(pData);

3329

3330 /* coverage for ubidi_getDummy() */

3331 errorCode=U_ZERO_ERROR;

3332 cbdp=ubidi_getDummy(&errorCode);

3333 if(ubidi_getClass(cbdp, 0x20)!=0) {

3334 log_err("ubidi_getClass(dummy, space)!=0\n");

3335 }

3336 #endif

3337 }

3338

3339 /* test case folding, compare return values with CaseFolding.txt ------------ */

3340

3341 /* bit set for which case foldings for a character have been tested already */

3342 enum {

3343 CF_SIMPLE=1,

3344 CF_FULL=2,

3345 CF_TURKIC=4,

3346 CF_ALL=7

3347 };

3348

3349 static void

3350 testFold(UChar32 c, int which,

3351 UChar32 simple, UChar32 turkic,

3352 const UChar *full, int32_t fullLength,

3353 const UChar *turkicFull, int32_t turkicFullLength) {

3354 UChar s[2], t[32];

3355 UChar32 c2;

3356 int32_t length, length2;

3357

3358 UErrorCode errorCode=U_ZERO_ERROR;

3359

3360 length=0;

3361 U16_APPEND_UNSAFE(s, length, c);

3362

3363 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {

3364 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (l ong)c2, (long)simple);

3365 }

3366 if((which&CF_FULL)!=0) {

3367 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);

3368 if(length2!=fullLength \|\| 0!=u_memcmp(t, full, fullLength)) {

3369 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);

3370 }

3371 }

3372 if((which&CF_TURKIC)!=0) {

3373 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {

3374 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);

3375 }

3376

3377 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUD E_SPECIAL_I, &errorCode);

3378 if(length2!=turkicFullLength \|\| 0!=u_memcmp(t, turkicFull, length2)) {

3379 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", ( long)c);

3380 }

3381 }

3382 }

3383

3384 /* test that c case-folds to itself */

3385 static void

3386 testFoldToSelf(UChar32 c, int which) {

3387 UChar s[2];

3388 int32_t length;

3389

3390 length=0;

3391 U16_APPEND_UNSAFE(s, length, c);

3392 testFold(c, which, c, c, s, length, s, length);

3393 }

3394

3395 struct CaseFoldingData {

3396 USet *notSeen;

3397 UChar32 prev, prevSimple;

3398 UChar prevFull[32];

3399 int32_t prevFullLength;

3400 int which;

3401 };

3402 typedef struct CaseFoldingData CaseFoldingData;

3403

3404 static void U_CALLCONV

3405 caseFoldingLineFn(void *context,

3406 char *fields[][2], int32_t fieldCount,

3407 UErrorCode *pErrorCode) {

3408 CaseFoldingData pData=(CaseFoldingData )context;

3409 char *end;

3410 UChar full[32];

3411 UChar32 c, prev, simple;

3412 int32_t count;

3413 int which;

3414 char status;

3415

3416 /* get code point */

3417 const char *s=u_skipWhitespace(fields[0][0]);

3418 if(0==strncmp(s, "0000..10FFFF", 12)) {

3419 /*

3420 * Ignore the line

3421 * # @missing: 0000..10FFFF; C; <code point>

3422 * because maps-to-self is already our default, and this line breaks thi s parser.

3423 */

3424 return;

3425 }

3426 c=(UChar32)strtoul(s, &end, 16);

3427 end=(char *)u_skipWhitespace(end);

3428 if(end<=fields[0][0] \|\| end!=fields[0][1]) {

3429 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]) ;

3430 *pErrorCode=U_PARSE_ERROR;

3431 return;

3432 }

3433

3434 /* get the status of this mapping */

3435 status=*u_skipWhitespace(fields[1][0]);

3436 if(status!='C' && status!='S' && status!='F' && status!='T') {

3437 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0 ][0]);

3438 *pErrorCode=U_PARSE_ERROR;

3439 return;

3440 }

3441

3442 /* get the mapping */

3443 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode) ;

3444 if(U_FAILURE(*pErrorCode)) {

3445 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);

3446 return;

3447 }

3448

3449 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */

3450 if(count==0 \|\| count>2 \|\| (count==2 && U16_IS_SINGLE(full[1]))) {

3451 simple=c;

3452 }

3453

3454 if(c!=(prev=pData->prev)) {

3455 /*

3456 * Test remaining mappings for the previous code point.

3457 * If a turkic folding was not mentioned, then it should fold the same

3458 * as the regular simple case folding.

3459 */

3460 UChar prevString[2];

3461 int32_t length;

3462

3463 length=0;

3464 U16_APPEND_UNSAFE(prevString, length, prev);

3465 testFold(prev, (~pData->which)&CF_ALL,

3466 prev, pData->prevSimple,

3467 prevString, length,

3468 pData->prevFull, pData->prevFullLength);

3469 pData->prev=pData->prevSimple=c;

3470 length=0;

3471 U16_APPEND_UNSAFE(pData->prevFull, length, c);

3472 pData->prevFullLength=length;

3473 pData->which=0;

3474 }

3475

3476 /*

3477 * Turn the status into a bit set of case foldings to test.

3478 * Remember non-Turkic case foldings as defaults for Turkic mode.

3479 */

3480 switch(status) {

3481 case 'C':

3482 which=CF_SIMPLE\|CF_FULL;

3483 pData->prevSimple=simple;

3484 u_memcpy(pData->prevFull, full, count);

3485 pData->prevFullLength=count;

3486 break;

3487 case 'S':

3488 which=CF_SIMPLE;

3489 pData->prevSimple=simple;

3490 break;

3491 case 'F':

3492 which=CF_FULL;

3493 u_memcpy(pData->prevFull, full, count);

3494 pData->prevFullLength=count;

3495 break;

3496 case 'T':

3497 which=CF_TURKIC;

3498 break;

3499 default:

3500 which=0;

3501 break; /* won't happen because of test above */

3502 }

3503

3504 testFold(c, which, simple, simple, full, count, full, count);

3505

3506 /* remember which case foldings of c have been tested */

3507 pData->which\|=which;

3508

3509 /* remove c from the set of ones not mentioned in CaseFolding.txt */

3510 uset_remove(pData->notSeen, c);

3511 }

3512

3513 static void

3514 TestCaseFolding() {

3515 CaseFoldingData data={ NULL };

3516 char *fields[3][2];

3517 UErrorCode errorCode;

3518

3519 static char lastLine= (char )"10FFFF; C; 10FFFF;";

3520

3521 errorCode=U_ZERO_ERROR;

3522 /* test BMP & plane 1 - nothing interesting above */

3523 data.notSeen=uset_open(0, 0x1ffff);

3524 data.prevFullLength=1; /* length of full case folding of U+0000 */

3525

3526 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorC ode);

3527 if(U_SUCCESS(errorCode)) {

3528 int32_t i, start, end;

3529

3530 /* add a pseudo-last line to finish testing of the actual last one */

3531 fields[0][0]=lastLine;

3532 fields[0][1]=lastLine+6;

3533 fields[1][0]=lastLine+7;

3534 fields[1][1]=lastLine+9;

3535 fields[2][0]=lastLine+10;

3536 fields[2][1]=lastLine+17;

3537 caseFoldingLineFn(&data, fields, 3, &errorCode);

3538

3539 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */

3540 for(i=0;

3541 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&

3542 U_SUCCESS(errorCode);

3543 ++i

3544 ) {

3545 do {

3546 testFoldToSelf(start, CF_ALL);

3547 } while(++start<=end);

3548 }

3549 }

3550

3551 uset_close(data.notSeen);

3552 }

OLD	NEW

« no previous file with comments | « source/test/cintltst/cucdapi.c ('k') | source/test/cintltst/currtest.c » ('j') | no next file with comments »