source/tools/dumpce/dumpce.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/tools/dumpce/dumpce.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /********************************************************************

2 * COPYRIGHT:

3 * Copyright (C) 2001-2011 IBM, Inc. All Rights Reserved.

4 *

5 ********************************************************************/

6 /******************************************************************************* *

7 *

8 * File dumpce.cpp

9 *

10 * Modification History:

11 * Name Date Description

12 * synwee May 31 2001 Creation

13 *

14 ******************************************************************************** *

15 */

16

17 /**

18 * This program outputs the collation elements used for a requested tailoring.

19 *

20 * Usage:

21 * dumpce options... please check main function.

22 */

23 #include <unicode/utypes.h>

24 #include <unicode/ucol.h>

25 #include <unicode/uloc.h>

26 #include <unicode/ucoleitr.h>

27 #include <unicode/uchar.h>

28 #include <unicode/uscript.h>

29 #include <unicode/utf16.h>

30 #include <unicode/putil.h>

31 #include <unicode/ustring.h>

32 #include <stdio.h>

33 #include <stdlib.h>

34 #include <string.h>

35 #include <time.h>

36 #include "ucol_tok.h"

37 #include "cstring.h"

38 #include "uoptions.h"

39 #include "ucol_imp.h"

40 #include <unicode/ures.h>

41 #include <unicode/uniset.h>

42 #include <unicode/usetiter.h>

43

44 /**

45 * Command line option variables.

46 * These global variables are set according to the options specified on the

47 * command line by the user.

48 */

49 static UOption options[]={

50 /* 00 */ UOPTION_HELP_H,

51 /* 01 */ UOPTION_HELP_QUESTION_MARK,

52 /* 02 */ {"locale", NULL, NULL, NULL, 'l', UOPT_REQUIRES_ARG, 0},

53 /* 03 */ {"serialize", NULL, NULL, NULL, 'z', UOPT_NO_ARG, 0},

54 /* 04 */ UOPTION_DESTDIR,

55 /* 05 */ UOPTION_SOURCEDIR,

56 /* 06 */ {"attribute", NULL, NULL, NULL, 'a', UOPT_REQUIRES_ARG, 0},

57 /* 07 */ {"rule", NULL, NULL, NULL, 'r', UOPT_REQUIRES_ARG, 0},

58 /* 08 */ {"normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0},

59 /* 09 */ {"scripts", NULL, NULL, NULL, 't', UOPT_NO_ARG, 0},

60 /* 10 */ {"reducehan", NULL, NULL, NULL, 'e', UOPT_NO_ARG, 0},

61 /* 11 */ UOPTION_VERBOSE,

62 /* 12 */ {"wholescripts", NULL, NULL, NULL, 'W', UOPT_NO_ARG, 0}

63 };

64

65 /**

66 * Collator used in this program

67 */

68 static UCollator *COLLATOR_;

69 /**

70 * Output strea, used in this program

71 */

72 static FILE *OUTPUT_;

73

74 static UColAttributeValue ATTRIBUTE_[UCOL_ATTRIBUTE_COUNT] = {

75 UCOL_DEFAULT, UCOL_DEFAULT, UCOL_DEFAULT, UCOL_DEFAULT, UCOL_DEFAULT,

76 UCOL_DEFAULT, UCOL_DEFAULT, UCOL_DEFAULT,

77 };

78

79 typedef struct {

80 int value;

81 char *name;

82 } EnumNameValuePair;

83

84 static const EnumNameValuePair ATTRIBUTE_NAME_[] = {

85 {UCOL_FRENCH_COLLATION, "UCOL_FRENCH_COLLATION"},

86 {UCOL_ALTERNATE_HANDLING, "UCOL_ALTERNATE_HANDLING"},

87 {UCOL_CASE_FIRST, "UCOL_CASE_FIRST"},

88 {UCOL_CASE_LEVEL, "UCOL_CASE_LEVEL"},

89 {UCOL_NORMALIZATION_MODE,

90 "UCOL_NORMALIZATION_MODE\|UCOL_DECOMPOSITION_MODE"},

91 {UCOL_STRENGTH, "UCOL_STRENGTH"},

92 {UCOL_HIRAGANA_QUATERNARY_MODE, "UCOL_HIRAGANA_QUATERNARY_MODE"},

93 {UCOL_NUMERIC_COLLATION, "UCOL_NUMERIC_COLLATION"},

94 NULL

95 };

96

97 static const EnumNameValuePair ATTRIBUTE_VALUE_[] = {

98 {UCOL_PRIMARY, "UCOL_PRIMARY"},

99 {UCOL_SECONDARY, "UCOL_SECONDARY"},

100 {UCOL_TERTIARY, "UCOL_TERTIARY\|UCOL_DEFAULT_STRENGTH"},

101 {UCOL_QUATERNARY, "UCOL_QUATERNARY"},

102 {UCOL_IDENTICAL, "UCOL_IDENTICAL"},

103 {UCOL_OFF, "UCOL_OFF"},

104 {UCOL_ON, "UCOL_ON"},

105 {UCOL_SHIFTED, "UCOL_SHIFTED"},

106 {UCOL_NON_IGNORABLE, "UCOL_NON_IGNORABLE"},

107 {UCOL_LOWER_FIRST, "UCOL_LOWER_FIRST"},

108 {UCOL_UPPER_FIRST, "UCOL_UPPER_FIRST"},

109 NULL

110 };

111

112 typedef struct {

113 UChar ch[32];

114 int count; // number of codepoint

115 UBool tailored;

116 } ScriptElement;

117

118 /**

119 * Writes the hexadecimal of a null-terminated array of codepoints into a

120 * file

121 * @param f UFILE instance to store

122 * @param c codepoints array

123 */

124 void serialize(FILE f, const UChar c)

125 {

126 UChar cp = *(c ++);

127

128 fprintf(f, " %04x", cp);

129

130 while (*c != 0) {

131 cp = *(c ++);

132 fprintf(f, " %04x", cp);

133 }

134 }

135

136 /**

137 * Writes the hexadecimal of a non-null-terminated array of codepoints into a

138 * file

139 * @param f UFILE instance to store

140 * @param c codepoints array

141 * @param l codepoints array length

142 */

143 void serialize(FILE f, const UChar c, int l)

144 {

145 int count = 1;

146 UChar cp = *(c ++);

147

148 fprintf(f, " %04x", cp);

149

150 while (count < l) {

151 cp = *(c ++);

152 fprintf(f, " %04x", cp);

153 count ++;

154 }

155 }

156

157 /**

158 * Sets the iterator to the argument string and outputs the collation elements.

159 * @param f file output stream

160 * @param iter collation element iterator

161 */

162 void serialize(FILE f, UCollationElements iter) {

163 const UChar *codepoint = iter->iteratordata_.string;

164 // unlikely that sortkeys will be over this size

165 uint8_t sortkey[64];

166 uint8_t *psortkey = sortkey;

167 int sortkeylength = 0;

168

169 if (iter->iteratordata_.flags & UCOL_ITER_HASLEN) {

170 serialize(f, codepoint, iter->iteratordata_.endp - codepoint);

171 sortkeylength = ucol_getSortKey(iter->iteratordata_.coll, codepoint,

172 iter->iteratordata_.endp - codepoint, sortkey, 64);

173 }

174 else {

175 serialize(f, codepoint);

176 sortkeylength = ucol_getSortKey(iter->iteratordata_.coll, codepoint,

177 -1, sortkey, 64);

178 }

179 if (options[11].doesOccur) {

180 serialize(stdout, codepoint);

181 fprintf(stdout, "\n");

182 }

183

184 fprintf(f, "; ");

185

186 UErrorCode error = U_ZERO_ERROR;

187 uint32_t ce = ucol_next(iter, &error);

188 if (U_FAILURE(error)) {

189 fprintf(f, "Error retrieving collation elements\n");

190 return;

191 }

192

193 while (TRUE) {

194 fprintf(f, "[");

195 if (UCOL_PRIMARYORDER(ce) != 0) {

196 fprintf(f, "%04x", UCOL_PRIMARYORDER(ce));

197 }

198 fprintf(f, ",");

199 if (UCOL_SECONDARYORDER(ce) != 0) {

200 fprintf(f, " %02x", UCOL_SECONDARYORDER(ce));

201 }

202 fprintf(f, ",");

203 if (UCOL_TERTIARYORDER(ce) != 0) {

204 fprintf(f, " %02x", UCOL_TERTIARYORDER(ce));

205 }

206 fprintf(f, "] ");

207

208 ce = ucol_next(iter, &error);

209 if (ce == UCOL_NULLORDER) {

210 break;

211 }

212 if (U_FAILURE(error)) {

213 fprintf(stdout, "Error retrieving collation elements");

214 return;

215 }

216 }

217

218 if (sortkeylength > 64) {

219 fprintf(f, "Sortkey exceeds pre-allocated size");

220 }

221

222 fprintf(f, "[");

223 while (TRUE) {

224 fprintf(f, "%02x", *psortkey);

225 psortkey ++;

226 if ((*psortkey) == 0) {

227 break;

228 }

229 fprintf(f, " ");

230 }

231 fprintf(f, "]\n");

232 }

233

234 /**

235 * Serializes the contraction within the given argument rule

236 * @param f file output stream

237 * @param r rule

238 * @param rlen rule length

239 * @param contractionsonly flag to indicate if only contractions are to be

240 * output or all collation elements

241 * @param iter iterator to iterate over collation elements

242 */

243 void serialize(FILE f, UChar rule, int rlen, UBool contractiononly,

244 UCollationElements *iter) {

245 const UChar *current = NULL;

246 uint32_t strength = 0;

247 uint32_t chOffset = 0;

248 uint32_t chLen = 0;

249 uint32_t exOffset = 0;

250 uint32_t exLen = 0;

251 uint32_t prefixOffset = 0;

252 uint32_t prefixLen = 0;

253 uint8_t specs = 0;

254 UBool rstart = TRUE;

255 UColTokenParser src;

256 UColOptionSet opts;

257 UParseError parseError;

258 UErrorCode error = U_ZERO_ERROR;

259

260 src.opts = &opts;

261

262 src.source = rule;

263 src.current = rule;

264 src.end = rule + rlen;

265 src.extraCurrent = src.end;

266 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

267

268

269 while ((current = ucol_tok_parseNextToken(&src, rstart, &parseError,

270 &error)) != NULL) {

271 chOffset = src.parsedToken.charsOffset;

272 chLen = src.parsedToken.charsLen;

273 // contractions handled here

274 if (!contractiononly \|\| chLen > 1) {

275 ucol_setText(iter, rule + chOffset, chLen, &error);

276 if (U_FAILURE(error)) {

277 fprintf(stdout, "Error setting text in iterator\n");

278 return;

279 }

280 serialize(f, iter);

281 }

282 rstart = FALSE;

283 }

284 }

285

286 /**

287 * Prints the attribute values in the argument collator into the output stream

288 * @param collator

289 */

290 void outputAttribute(UCollator collator, UErrorCode error)

291 {

292 UColAttribute attribute = UCOL_FRENCH_COLLATION;

293 while (attribute < UCOL_ATTRIBUTE_COUNT) {

294 int count = 0;

295 while (TRUE) {

296 // getting attribute name

297 if (ATTRIBUTE_NAME_[count].value == attribute) {

298 fprintf(OUTPUT_, "%s = ", ATTRIBUTE_NAME_[count].name);

299 break;

300 }

301 count ++;

302 }

303 count = 0;

304 int attributeval = ucol_getAttribute(collator, attribute, error);

305 if (U_FAILURE(*error)) {

306 fprintf(stdout, "Failure in reading collator attribute\n");

307 return;

308 }

309 while (TRUE) {

310 // getting attribute value

311 if (ATTRIBUTE_VALUE_[count].value == attributeval) {

312 fprintf(OUTPUT_, "%s\n", ATTRIBUTE_VALUE_[count].name);

313 break;

314 }

315 count ++;

316 }

317 attribute = (UColAttribute)(attribute + 1);

318 }

319 }

320

321 /**

322 * Prints the normalization mode in the argument collator into the output stream

323 * @param collator

324 */

325 void outputNormalization(UCollator *collator)

326 {

327 UErrorCode status = U_ZERO_ERROR;

328 int normmode = ucol_getAttribute(collator, UCOL_NORMALIZATION_MODE, &status) ;

329 int count = 0;

330 while (TRUE) {

331 // getting attribute name

332 if (ATTRIBUTE_VALUE_[count].value == normmode) {

333 break;

334 }

335 count ++;

336 }

337 fprintf(OUTPUT_, "NORMALIZATION MODE = %s\n",

338 ATTRIBUTE_VALUE_[count].name);

339 }

340

341 /**

342 * Output the collation element belonging to the locale into a file

343 * @param locale string

344 * @param fullrules flag to indicate if only tailored collation elements are to

345 * be output or all collation elements

346 */

347 void serialize(const char *locale, UBool tailoredonly) {

348 UErrorCode error = U_ZERO_ERROR;

349 UChar str[128];

350 int strlen = 0;

351

352 fprintf(OUTPUT_, "# This file contains the serialized collation elements\n") ;

353 fprintf(OUTPUT_, "# as of the collation version indicated below.\n");

354 fprintf(OUTPUT_, "# Data format: xxxx xxxx..; [yyyy, yy, yy] [yyyy, yy, yy] ... [yyyy, yy, yy] [zz zz..\n");

355 fprintf(OUTPUT_, "# where xxxx are codepoints in hexadecimals,\ n");

356 fprintf(OUTPUT_, "# yyyyyyyy are the corresponding\n");

357 fprintf(OUTPUT_, "# collation elements in hexadecimals\n");

358 fprintf(OUTPUT_, "# and zz are the sortkey values in hexadecima ls\n");

359

360 fprintf(OUTPUT_, "\n# Collator information\n");

361

362 fprintf(OUTPUT_, "\nLocale: %s\n", locale);

363 fprintf(stdout, "Locale: %s\n", locale);

364 UVersionInfo version;

365 ucol_getVersion(COLLATOR_, version);

366 fprintf(OUTPUT_, "Version number: %d.%d.%d.%d\n",

367 version[0], version[1], version[2], version[3]);

368 outputAttribute(COLLATOR_, &error);

369 outputNormalization(COLLATOR_);

370

371 UCollationElements *iter = ucol_openElements(COLLATOR_, str, strlen,

372 &error);

373 if (U_FAILURE(error)) {

374 fprintf(stdout, "Error creating iterator\n");

375 return;

376 }

377

378 if (!tailoredonly) {

379 fprintf(OUTPUT_, "\n# Range of unicode characters\n\n");

380 UChar32 codepoint = 0;

381 while (codepoint <= UCHAR_MAX_VALUE) {

382 if (u_isdefined(codepoint)) {

383 strlen = 0;

384 UTF16_APPEND_CHAR_UNSAFE(str, strlen, codepoint);

385 str[strlen] = 0;

386 ucol_setText(iter, str, strlen, &error);

387 if (U_FAILURE(error)) {

388 fprintf(stdout, "Error setting text in iterator\n");

389 return;

390 }

391 serialize(OUTPUT_, iter);

392 }

393 codepoint ++;

394 }

395 }

396

397 UChar ucarules[0x10000];

398 UChar *rules;

399 int32_t rulelength = 0;

400 rules = ucarules;

401

402 if (tailoredonly) {

403 int32_t rulelength = 0;

404 const UChar *temp = ucol_getRules(COLLATOR_, &rulelength);

405 if (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE > 0x10000) {

406 rules = (UChar )malloc(sizeof(UChar)

407 (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE));

408 }

409 memcpy(rules, temp, rulelength * sizeof(UChar));

410 rules[rulelength] = 0;

411 fprintf(OUTPUT_, "\n# Tailorings\n\n");

412 serialize(OUTPUT_, rules, rulelength, FALSE, iter);

413 if (rules != ucarules) {

414 free(rules);

415 }

416 }

417 else {

418 rulelength = ucol_getRulesEx(COLLATOR_, UCOL_FULL_RULES, ucarules,

419 0x10000);

420 if (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE > 0x10000) {

421 rules = (UChar )malloc(sizeof(UChar)

422 (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE));

423 rulelength = ucol_getRulesEx(COLLATOR_, UCOL_FULL_RULES, rules,

424 rulelength);

425 }

426 fprintf(OUTPUT_, "\n# Contractions\n\n");

427 serialize(OUTPUT_, rules, rulelength, TRUE, iter);

428 if (rules != ucarules) {

429 free(rules);

430 }

431 }

432

433 ucol_closeElements(iter);

434 }

435

436 /**

437 * Sets the collator with the attribute values

438 * @param collator

439 * @param error status

440 */

441 void setAttributes(UCollator collator, UErrorCode error)

442 {

443 int count = 0;

444 while (count < UCOL_ATTRIBUTE_COUNT) {

445 if (ATTRIBUTE_[count] != UCOL_DEFAULT) {

446 ucol_setAttribute(collator, (UColAttribute)count,

447 ATTRIBUTE_[count], error);

448 if (U_FAILURE(*error)) {

449 return;

450 }

451 }

452 count ++;

453 }

454 }

455

456 /**

457 * Appends directory path with an ending seperator if necessary.

458 * @param path with enough space to append one seperator

459 * @return new directory path length

460 */

461 int appendDirSeparator(char *dir)

462 {

463 int dirlength = strlen(dir);

464 char dirending = dir[dirlength - 1];

465 if (dirending != U_FILE_SEP_CHAR) {

466 dir[dirlength] = U_FILE_SEP_CHAR;

467 dir[dirlength + 1] = 0;

468 return dirlength + 1;

469 }

470 return dirlength;

471 }

472

473 /**

474 * Output the collation element into a file

475 */

476 void serialize() {

477 char filename[128];

478 int dirlength = 0;

479

480 if (options[4].doesOccur) {

481 strcpy(filename, options[4].value);

482 dirlength = appendDirSeparator(filename);

483 }

484

485 if (options[2].doesOccur) {

486 const char locale = (char )options[2].value;

487 int32_t localeindex = 0;

488

489 if (strcmp(locale, "all") == 0) {

490 if (options[4].doesOccur) {

491 strcat(filename, "UCA.txt");

492 OUTPUT_ = fopen(filename, "w");

493 if (OUTPUT_ == NULL) {

494 fprintf(stdout, "Cannot open file:%s\n", filename);

495 return;

496 }

497 }

498 fprintf(stdout, "UCA\n");

499 UErrorCode error = U_ZERO_ERROR;

500 COLLATOR_ = ucol_open("en_US", &error);

501 if (U_FAILURE(error)) {

502 fprintf(stdout, "Collator creation failed:");

503 fprintf(stdout, u_errorName(error));

504 goto CLOSEUCA;

505 return;

506 }

507 setAttributes(COLLATOR_, &error);

508 if (U_FAILURE(error)) {

509 fprintf(stdout, "Collator attribute setting failed:");

510 fprintf(stdout, u_errorName(error));

511 goto CLOSEUCA;

512 return;

513 }

514

515 serialize("UCA", FALSE);

516 CLOSEUCA :

517 if (options[4].doesOccur) {

518 filename[dirlength] = 0;

519 fclose(OUTPUT_);

520 }

521 ucol_close(COLLATOR_);

522 localeindex = ucol_countAvailable() - 1;

523 fprintf(stdout, "Number of locales: %d\n", localeindex + 1);

524 locale = ucol_getAvailable(localeindex);

525 }

526

527 while (TRUE) {

528 UErrorCode error = U_ZERO_ERROR;

529 COLLATOR_ = ucol_open(locale, &error);

530 if (U_FAILURE(error)) {

531 fprintf(stdout, "Collator creation failed:");

532 fprintf(stdout, u_errorName(error));

533 goto CLOSETAILOR;

534 return;

535 }

536 setAttributes(COLLATOR_, &error);

537 if (U_FAILURE(error)) {

538 fprintf(stdout, "Collator attribute setting failed:");

539 fprintf(stdout, u_errorName(error));

540 goto CLOSETAILOR;

541 return;

542 }

543

544 if (options[4].doesOccur) {

545 strcat(filename, locale);

546 strcat(filename, ".txt");

547 OUTPUT_ = fopen(filename, "w");

548 if (OUTPUT_ == NULL) {

549 fprintf(stdout, "Cannot open file:%s\n", filename);

550 return;

551 }

552 }

553

554 if (options[3].doesOccur) {

555 serialize(locale, TRUE);

556 }

557

558 ucol_close(COLLATOR_);

559

560 CLOSETAILOR :

561 if (options[4].doesOccur) {

562 filename[dirlength] = 0;

563 fclose(OUTPUT_);

564 }

565

566 localeindex --;

567 if (localeindex < 0) {

568 break;

569 }

570 locale = ucol_getAvailable(localeindex);

571 }

572 }

573

574 if (options[7].doesOccur) {

575 char inputfilename[128] = "";

576 // rules are to be used

577 if (options[5].doesOccur) {

578 strcpy(inputfilename, options[5].value);

579 appendDirSeparator(inputfilename);

580 }

581 strcat(inputfilename, options[7].value);

582 FILE *input = fopen(inputfilename, "r");

583 if (input == NULL) {

584 fprintf(stdout, "Cannot open file:%s\n", filename);

585 return;

586 }

587

588 char s[1024];

589 UChar rule[1024];

590 UChar *prule = rule;

591 int size = 1024;

592 // synwee TODO: make this part dynamic

593 while (fscanf(input, "%[^\n]s", s) != EOF) {

594 size -= u_unescape(s, prule, size);

595 prule = prule + u_strlen(prule);

596 }

597 fclose(input);

598

599 if (options[4].doesOccur) {

600 strcat(filename, "Rules.txt");

601 OUTPUT_ = fopen(filename, "w");

602 if (OUTPUT_ == NULL) {

603 fprintf(stdout, "Cannot open file:%s\n", filename);

604 return;

605 }

606 }

607

608 fprintf(stdout, "Rules\n");

609 UErrorCode error = U_ZERO_ERROR;

610 UParseError parseError;

611 COLLATOR_ = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,

612 UCOL_DEFAULT_STRENGTH, &parseError, &error);

613 if (U_FAILURE(error)) {

614 fprintf(stdout, "Collator creation failed:");

615 fprintf(stdout, u_errorName(error));

616 goto CLOSERULES;

617 return;

618 }

619 setAttributes(COLLATOR_, &error);

620 if (U_FAILURE(error)) {

621 fprintf(stdout, "Collator attribute setting failed:");

622 fprintf(stdout, u_errorName(error));

623 goto CLOSERULES;

624 return;

625 }

626

627 serialize("Rule-based", TRUE);

628 ucol_close(COLLATOR_);

629

630 CLOSERULES :

631 if (options[4].doesOccur) {

632 filename[dirlength] = 0;

633 fclose(OUTPUT_);

634 }

635 }

636 }

637

638 /**

639 * Parse for enum values.

640 * Note this only works for positive enum values.

641 * @param enumarray array containing names of the enum values in string and

642 * their corresponding value.

643 * declared enum value.

644 * @param str string to be parsed

645 * @return corresponding integer enum value or -1 if value is not found.

646 */

647 int parseEnums(const EnumNameValuePair enumarray[], const char *str)

648 {

649 const char *enumname = enumarray[0].name;

650 int result = atoi(str);

651 if (result == 0 && str[0] != '0') {

652 while (strcmp(enumname, str) != 0) {

653 // checking for multiple enum names sharing the same values

654 enumname = strstr(enumname, str);

655 if (enumname != NULL) {

656 int size = strchr(enumname, '\|') - enumname;

657 if (size < 0) {

658 size = strlen(enumname);

659 }

660 if (size == (int)strlen(str)) {

661 return enumarray[result].value;

662 }

663 }

664 result ++;

665 if (&(enumarray[result]) == NULL) {

666 return -1;

667 }

668 enumname = enumarray[result].name;

669 }

670 }

671 return -1;

672 }

673

674 /**

675 * Parser for attribute name value pair

676 */

677 void parseAttributes() {

678 char str[32];

679 const char *pname = options[6].value;

680 const char *pend = options[6].value + strlen(options[6].value);

681 const char *pvalue;

682

683 while (pname < pend) {

684 pvalue = strchr(pname, '=');

685 if (pvalue == NULL) {

686 fprintf(stdout,

687 "No matching value found for attribute argument %s\n",

688 pname);

689 return;

690 }

691 int count = pvalue - pname;

692 strncpy(str, pname, count);

693 str[count] = 0;

694

695 int name = parseEnums(ATTRIBUTE_NAME_, str);

696 if (name == -1) {

697 fprintf(stdout, "Attribute name not found: %s\n", str);

698 return;

699 }

700

701 pvalue ++;

702 // getting corresponding enum value

703 pname = strchr(pvalue, ',');

704 if (pname == NULL) {

705 pname = pend;

706 }

707 count = pname - pvalue;

708 strncpy(str, pvalue, count);

709 str[count] = 0;

710 int value = parseEnums(ATTRIBUTE_VALUE_, str);

711 if (value == -1) {

712 fprintf(stdout, "Attribute value not found: %s\n", str);

713 return;

714 }

715 ATTRIBUTE_[name] = (UColAttributeValue)value;

716 pname ++;

717 }

718 }

719

720 /**

721 * Checks if the locale argument is a base language

722 * @param locale to be checked

723 * @return TRUE if it is a base language

724 */

725 inline UBool checkLocaleForLanguage(const char *locale)

726 {

727 return strlen(locale) <= 2;

728 }

729

730 /**

731 * Converts a UChar array into its string form "xxxx xxxx"

732 * @param ch array of UChar characters

733 * @param count number of UChar characters

734 */

735 void outputUChar(UChar ch[], int count)

736 {

737 for (int i = 0; i < count; i ++) {

738 fprintf(OUTPUT_, "%04X ", ch[i]);

739 }

740 }

741

742 /**

743 * If it is a primary difference returns -1 or 1.

744 * If it is a secondary difference returns -2 or 2.

745 * If it is a tertiary difference returns -3 or 3.

746 * If equals returns 0.

747 */

748 int compareSortKey(const void elem1, const void elem2)

749 {

750 // compare the 2 script element sort key

751 UChar ch1 = ((ScriptElement )elem1)->ch;

752 UChar ch2 = ((ScriptElement )elem2)->ch;

753 int size1 = ((ScriptElement *)elem1)->count;

754 int size2 = ((ScriptElement *)elem2)->count;

755 UErrorCode error = U_ZERO_ERROR;

756

757 ucol_setStrength(COLLATOR_, UCOL_PRIMARY);

758 int result = ucol_strcoll(COLLATOR_, ch1, size1, ch2, size2);

759 if (result == 0) {

760 ucol_setStrength(COLLATOR_, UCOL_SECONDARY);

761 result = ucol_strcoll(COLLATOR_, ch1, size1, ch2, size2);

762 if (result == 0) {

763 ucol_setStrength(COLLATOR_, UCOL_TERTIARY);

764 result = ucol_strcoll(COLLATOR_, ch1, size1, ch2, size2);

765 if (result < 0) {

766 return -3;

767 }

768 if (result > 0) {

769 return 3;

770 }

771 }

772 if (result < 0) {

773 return -2;

774 }

775 if (result > 0) {

776 return 2;

777 }

778 }

779 return result;

780 }

781

782 /**

783 * Output serialized script elements

784 * @param element the element to output

785 * @param compare the comparison with the previous element

786 * @param expansion flags TRUE if element has an expansion

787 */

788 void outputScriptElem(ScriptElement &element, int compare, UBool expansion)

789 {

790 switch (compare) {

791 case 0:

792 if (expansion) {

793 fprintf(OUTPUT_, "<tr><td class='eq' title='[");

794 }

795 else {

796 fprintf(OUTPUT_, "<tr><td class='q' title='[");

797 }

798 break;

799 case -1:

800 if (expansion) {

801 fprintf(OUTPUT_, "<tr><td class='ep' title='[");

802 }

803 else {

804 fprintf(OUTPUT_, "<tr><td class='p' title='[");

805 }

806 break;

807 case -2:

808 if (expansion) {

809 fprintf(OUTPUT_, "<tr><td class='es' title='[");

810 }

811 else {

812 fprintf(OUTPUT_, "<tr><td class='s' title='[");

813 }

814 break;

815 default:

816 if (expansion) {

817 fprintf(OUTPUT_, "<tr><td class='et' title='[");

818 }

819 else {

820 fprintf(OUTPUT_, "<tr><td class='t' title='[");

821 }

822 }

823

824 uint8_t sortkey[32];

825 ucol_setStrength(COLLATOR_, UCOL_TERTIARY);

826 ucol_getSortKey(COLLATOR_, element.ch, element.count, sortkey, 32);

827 int i = 0;

828 while (sortkey[i] != 0) {

829 if (sortkey[i] == 1) {

830 fprintf(OUTPUT_, " \| ");

831 }

832 else {

833 fprintf(OUTPUT_, "%02x", sortkey[i]);

834 }

835

836 i ++;

837 }

838

839 fprintf(OUTPUT_, "]'>");

840

841 UErrorCode error = U_ZERO_ERROR;

842 char utf8[64];

843 UChar nfc[32];

844 int32_t length = unorm_normalize(element.ch, element.count, UNORM_NFC, 0, nfc,

845 32, &error);

846 if (U_FAILURE(error)) {

847 fprintf(stdout, "Error normalizing contractions to NFC\n");

848 }

849 u_strToUTF8(utf8, 64, &length, nfc, length, &error);

850 if (U_FAILURE(error)) {

851 fprintf(stdout, "Error converting UChar to utf8\n");

852 return;

853 }

854

855 fprintf(OUTPUT_, "%s<br>", utf8);

856 fprintf(OUTPUT_, "<tt>");

857 outputUChar(element.ch, element.count);

858

859 if (compare == 0) {

860 fprintf(OUTPUT_, "</tt></td><td> </td><td> </td><td> </td ><td>Q</td><td>");

861 }

862 else if (compare == -1) {

863 fprintf(OUTPUT_, "</tt></td><td>P</td><td> </td><td> </td><td>  </td><td>");

864 }

865 else if (compare == -2) {

866 fprintf(OUTPUT_, "</tt></td><td> </td><td>S</td><td> </td><td>  </td><td>");

867 }

868 else if (compare == -3) {

869 fprintf(OUTPUT_, "</tt></td><td> </td><td> </td><td>T</td><td>  </td><td>");

870 }

871

872 i = 0;

873 while (i < element.count) {

874 char str[128];

875 UChar32 codepoint;

876 U16_NEXT(element.ch, i, element.count, codepoint);

877 int32_t temp = u_charName(codepoint, U_UNICODE_CHAR_NAME, str, 128,

878 &error);

879 if (U_FAILURE(error)) {

880 fprintf(stdout, "Error getting character name\n");

881 return;

882 }

883 if (element.tailored) {

884 fprintf(OUTPUT_, "<b>");

885 }

886 fprintf(OUTPUT_, "%s", str);

887 if (element.tailored) {

888 fprintf(OUTPUT_, " *</b>");

889 }

890 if (i < element.count) {

891 fprintf(OUTPUT_, "<br>\n");

892 }

893 }

894

895 fprintf(OUTPUT_, "</td></tr>\n");

896 }

897

898 /**

899 * Checks if codepoint belongs to scripts

900 * @param script list

901 * @param scriptcount number of scripts

902 * @param codepoint to test

903 * @return TRUE if codepoint belongs to scripts

904 */

905 UBool checkInScripts(UScriptCode script[], int scriptcount,

906 UChar32 codepoint)

907 {

908 UErrorCode error = U_ZERO_ERROR;

909 for (int i = 0; i < scriptcount; i ++) {

910 if (script[i] == USCRIPT_HAN && options[10].doesOccur) {

911 if ((codepoint >= 0x2E80 && codepoint <= 0x2EE4) \|\|

912 (codepoint >= 0x2A672 && codepoint <= 0x2A6D6)) {

913 // reduce han

914 return TRUE;

915 }

916 }

917 else if (uscript_getScript(codepoint, &error) == script[i]) {

918 return TRUE;

919 }

920 if (U_FAILURE(error)) {

921 fprintf(stdout, "Error checking character in scripts\n");

922 return FALSE;

923 }

924 }

925 return FALSE;

926 }

927

928 /**

929 * Checks if the set of codepoints belongs to the script

930 * @param script list

931 * @param scriptcount number of scripts

932 * @param scriptelem

933 * @return TRUE if all codepoints belongs to the script

934 */

935 inline UBool checkInScripts(UScriptCode script[], int scriptcount,

936 ScriptElement scriptelem)

937 {

938 int i = 0;

939 while (i < scriptelem.count) {

940 UChar32 codepoint;

941 U16_NEXT(scriptelem.ch, i, scriptelem.count, codepoint);

942 UErrorCode error = U_ZERO_ERROR;

943 if (checkInScripts(script, scriptcount, codepoint)) {

944 return TRUE;

945 }

946 }

947 return FALSE;

948 }

949

950 /**

951 * Gets the script elements and contractions belonging to the script

952 * @param elems output list

953 * @param locale locale

954 * @return number of script elements

955 * Add by Richard

956 */

957 int getScriptElementsFromExemplars(ScriptElement scriptelem[], const char* local e) {

958 UErrorCode error = U_ZERO_ERROR;

959 UChar32 codepoint = 0;

960

961 UResourceBundle* ures = ures_open(NULL, locale, &error);

962 if (U_FAILURE(error)) {

963 fprintf(stdout, "Can not find resource bundle for locale: %s\n", locale) ;

964 return -1;

965 }

966 int32_t length;

967 const UChar* exemplarChars = ures_getStringByKey(ures, "ExemplarCharacters", &length, &error);

968

969 if (U_FAILURE(error)) {

970 fprintf(stdout, "Can not find ExemplarCharacters in resource bundle\n");

971 return -1;

972 }

973

974 UChar* upperChars = new UChar[length * 2];

975 if (upperChars == 0) {

976 fprintf(stdout, "Memory error\n");

977 return -1;

978 }

979

980 int32_t destLength = u_strToUpper(upperChars, length * 2, exemplarChars, -1, locale, &error);

981 if (U_FAILURE(error)) {

982 fprintf(stdout, "Error when u_strToUpper() \n");

983 return -1;

984 }

985

986 UChar* pattern = new UChar[length + destLength + 10];

987 UChar left[2] = {0x005b, 0x0};

988 UChar right[2] = {0x005d, 0x0};

989 pattern = u_strcpy(pattern, left);

990 pattern = u_strcat(pattern, exemplarChars);

991 pattern = u_strcat(pattern, upperChars);

992 pattern = u_strcat(pattern, right);

993

994 UnicodeSet * uniset = new UnicodeSet(UnicodeString(pattern), error);

995 if (U_FAILURE(error)) {

996 fprintf(stdout, "Can not open USet \n");

997 return -1;

998 }

999

1000 UnicodeSetIterator* usetiter = new UnicodeSetIterator(*uniset);

1001

1002 int32_t count = 0;

1003

1004 while (usetiter -> next()) {

1005 if (usetiter -> isString()) {

1006 UnicodeString strItem = usetiter -> getString();

1007

1008 scriptelem[count].count = 0;

1009 for (int i = 0; i < strItem.length(); i++) {

1010 codepoint = strItem.char32At(i);

1011 UTF16_APPEND_CHAR_UNSAFE(scriptelem[count].ch, scriptelem[count] .count, codepoint);

1012 scriptelem[count].tailored = FALSE;

1013 }

1014 } else {

1015 codepoint = usetiter -> getCodepoint();

1016 scriptelem[count].count = 0;

1017 UTF16_APPEND_CHAR_UNSAFE(scriptelem[count].ch, scriptelem[count].cou nt, codepoint);

1018 scriptelem[count].tailored = FALSE;

1019 }

1020

1021 count++;

1022 }

1023 delete []pattern;

1024

1025 return count;

1026 }

1027

1028 /**

1029 * Gets the script elements and contractions belonging to the script

1030 * @param script list

1031 * @param scriptcount number of scripts

1032 * @param elems output list

1033 * @return number of script elements

1034 */

1035 int getScriptElements(UScriptCode script[], int scriptcount,

1036 ScriptElement scriptelem[])

1037 {

1038 UErrorCode error = U_ZERO_ERROR;

1039 UChar32 codepoint = 0;

1040 int count = 0;

1041 while (codepoint <= UCHAR_MAX_VALUE) {

1042 if (checkInScripts(script, scriptcount, codepoint)) {

1043 scriptelem[count].count = 0;

1044 UTF16_APPEND_CHAR_UNSAFE(scriptelem[count].ch,

1045 scriptelem[count].count, codepoint);

1046 scriptelem[count].tailored = FALSE;

1047 count ++;

1048 }

1049 if (U_FAILURE(error)) {

1050 fprintf(stdout, "Error determining codepoint in script\n");

1051 return -1;

1052 }

1053 codepoint ++;

1054 }

1055

1056 const UChar *current = NULL;

1057 uint32_t strength = 0;

1058 uint32_t chOffset = 0;

1059 uint32_t chLen = 0;

1060 uint32_t exOffset = 0;

1061 uint32_t exLen = 0;

1062 uint32_t prefixOffset = 0;

1063 uint32_t prefixLen = 0;

1064 uint8_t specs = 0;

1065 UBool rstart = TRUE;

1066 UColTokenParser src;

1067 UColOptionSet opts;

1068 UParseError parseError;

1069

1070 int32_t rulelength = ucol_getRulesEx(COLLATOR_, UCOL_FULL_RULES, NULL, 0);

1071 src.source = (UChar )malloc(sizeof(UChar)

1072 (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE));

1073 rulelength = ucol_getRulesEx(COLLATOR_, UCOL_FULL_RULES, src.source,

1074 rulelength);

1075 src.current = src.source;

1076 src.end = src.source + rulelength;

1077 src.extraCurrent = src.end;

1078 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

1079 src.opts = &opts;

1080

1081 /*

1082 ucol_tok_parseNextToken(&src, &strength, &chOffset,

1083 &chLen, &exOffset, &exLen,

1084 &prefixOffset, &prefixLen,

1085 &specs, rstart, &parseError,

1086 &error)

1087 */

1088 while ((current = ucol_tok_parseNextToken(&src, rstart, &parseError,

1089 &error)) != NULL) {

1090 // contractions handled here

1091 if (chLen > 1) {

1092 u_strncpy(scriptelem[count].ch, src.source + chOffset, chLen);

1093 scriptelem[count].count = chLen;

1094 if (checkInScripts(script, scriptcount, scriptelem[count])) {

1095 scriptelem[count].tailored = FALSE;

1096 count ++;

1097 }

1098 }

1099 rstart = FALSE;

1100 }

1101 if (U_FAILURE(error)) {

1102 fprintf(stdout, "Error parsing rules: %s\n", u_errorName(error));

1103 }

1104 // rule might have been reallocated, so delete this instead

1105 free(src.source);

1106 return count;

1107 }

1108

1109 int compareCodepoints(const void elem1, const void elem2)

1110 {

1111 UChar ch1 = ((ScriptElement )elem1)->ch; // key

1112 UChar ch2 = ((ScriptElement )elem2)->ch;

1113 ch1[((ScriptElement *)elem1)->count] = 0;

1114 ch2[((ScriptElement *)elem2)->count] = 0;

1115

1116 // compare the 2 codepoints

1117 return u_strcmp(ch1, ch2);

1118 }

1119

1120 UBool hasSubNFD(ScriptElement &se, ScriptElement &key)

1121 {

1122 UChar *ch1 = se.ch;

1123 UChar *ch2 = key.ch; // key

1124 ch1[se.count] = 0;

1125 ch2[key.count] = 0;

1126

1127 // compare the 2 codepoints

1128 if (u_strstr(ch1, ch2) != NULL) {

1129 return TRUE;

1130 }

1131

1132 // check the decomposition

1133 UChar norm[32];

1134 UErrorCode error = U_ZERO_ERROR;

1135 int size = unorm_normalize(ch1, se.count, UNORM_NFD, 0, norm, 32,

1136 &error);

1137 if (U_FAILURE(error)) {

1138 fprintf(stdout, "Error normalizing\n");

1139 }

1140 if (u_strstr(norm, ch2) != NULL) {

1141 return TRUE;

1142 }

1143 return FALSE;

1144 }

1145

1146 /**

1147 * Marks tailored elements

1148 * @param script list

1149 * @param scriptcount number of scripts

1150 * @param scriptelem script element list

1151 * @param scriptelemlength size of the script element list

1152 */

1153 void markTailored(UScriptCode script[], int scriptcount,

1154 ScriptElement scriptelem[], int scriptelemlength)

1155 {

1156 int32_t rulelength;

1157 const UChar *rule = ucol_getRules(COLLATOR_, &rulelength);

1158

1159 const UChar *current = NULL;

1160 uint32_t strength = 0;

1161 uint32_t chOffset = 0;

1162 uint32_t chLen = 0;

1163 uint32_t exOffset = 0;

1164 uint32_t exLen = 0;

1165 uint32_t prefixOffset = 0;

1166 uint32_t prefixLen = 0;

1167 uint8_t specs = 0;

1168 UBool rstart = TRUE;

1169 UColTokenParser src;

1170 UColOptionSet opts;

1171 UParseError parseError;

1172

1173 src.opts = &opts;

1174 src.source = (UChar *)malloc(

1175 (rulelength + UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));

1176 memcpy(src.source, rule, rulelength * sizeof(UChar));

1177 src.current = src.source;

1178 src.end = (UChar *)src.source + rulelength;

1179 src.extraCurrent = src.end;

1180 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

1181

1182 UErrorCode error = U_ZERO_ERROR;

1183

1184 while ((current = ucol_tok_parseNextToken(&src, rstart, &parseError,

1185 &error)) != NULL) {

1186 if (chLen >= 1 && strength != UCOL_TOK_RESET) {

1187 // skipping the reset characters and non useful stuff.

1188 ScriptElement se;

1189 u_strncpy(se.ch, src.source + chOffset, chLen);

1190 se.count = chLen;

1191

1192 if (checkInScripts(script, scriptcount, se)) {

1193 /*

1194 ScriptElement tse = (ScriptElement )bsearch(&se, scriptelem,

1195 scriptelemlength,

1196 sizeof(ScriptElement),

1197 compareCodepoints);

1198 */

1199 for (int i = 0; i < scriptelemlength; i ++) {

1200 if (!scriptelem[i].tailored &&

1201 hasSubNFD(scriptelem[i], se)) {

1202 scriptelem[i].tailored = TRUE;

1203 }

1204 }

1205 }

1206 }

1207 rstart = FALSE;

1208 }

1209 free(src.source);

1210 if (U_FAILURE(error)) {

1211 fprintf(stdout, "Error parsing rules\n");

1212 }

1213 }

1214

1215 /**

1216 * Checks if the collation iterator has more than 1 collation element

1217 * @parem coleiter collation element iterator

1218 * @return TRUE if collation iterator has more than 1 collation element

1219 */

1220 UBool hasExpansions(UCollationElements *coleiter)

1221 {

1222 UErrorCode error = U_ZERO_ERROR;

1223 int32_t ce = ucol_next(coleiter, &error);

1224 int count = 0;

1225

1226 if (U_FAILURE(error)) {

1227 fprintf(stdout, "Error getting next collation element\n");

1228 }

1229 while (ce != UCOL_NULLORDER) {

1230 if ((UCOL_PRIMARYORDER(ce) != 0) && !isContinuation(ce)) {

1231 count ++;

1232 if (count == 2) {

1233 return TRUE;

1234 }

1235 }

1236 ce = ucol_next(coleiter, &error);

1237 if (U_FAILURE(error)) {

1238 fprintf(stdout, "Error getting next collation element\n");

1239 }

1240 }

1241 return FALSE;

1242 }

1243

1244 /**

1245 * Prints the footer for index.html

1246 * @param file output file

1247 */

1248 void outputHTMLFooter()

1249 {

1250 fprintf(OUTPUT_, "</table>\n");

1251 fprintf(OUTPUT_, "</body>\n");

1252 fprintf(OUTPUT_, "</html>\n");

1253 }

1254

1255 /**

1256 * Serialize the codepoints from start to end into an html file.

1257 * Arranging them into ascending collation order.

1258 * @param script code list

1259 * @param scriptcount number of scripts

1260 */

1261 //void serializeScripts(UScriptCode script[], int scriptcount)

1262 //Richard

1263 void serializeScripts(UScriptCode script[], int scriptcount, const char* locale = NULL)

1264 {

1265 UErrorCode error = U_ZERO_ERROR;

1266

1267 ScriptElement *scriptelem =

1268 (ScriptElement )malloc(sizeof(ScriptElement) 0x20000);

1269 if (scriptelem == NULL) {

1270 fprintf(stdout, "Memory error\n");

1271 return;

1272 }

1273 int count = 0;

1274 if(locale) {

1275 count = getScriptElementsFromExemplars(scriptelem, locale);

1276 } else {

1277 count = getScriptElements(script, scriptcount, scriptelem);

1278 }

1279

1280 // Sort script elements using Quicksort algorithm:

1281 qsort(scriptelem, count, sizeof(ScriptElement), compareCodepoints);

1282 markTailored(script, scriptcount, scriptelem, count);

1283 // Sort script elements using Quicksort algorithm:

1284 qsort(scriptelem, count, sizeof(ScriptElement), compareSortKey);

1285

1286 UCollationElements* coleiter = ucol_openElements(COLLATOR_,

1287 scriptelem[0].ch,

1288 scriptelem[0].count,

1289 &error);

1290 if (U_FAILURE(error)) {

1291 fprintf(stdout, "Error creating collation element iterator\n");

1292 return;

1293 }

1294

1295 outputScriptElem(scriptelem[0], -1, hasExpansions(coleiter));

1296 for (int i = 0; i < count - 1; i ++) {

1297 ucol_setText(coleiter, scriptelem[i + 1].ch, scriptelem[i + 1].count,

1298 &error);

1299 if (U_FAILURE(error)) {

1300 fprintf(stdout, "Error setting text in collation element iterator\n" );

1301 return;

1302 }

1303 outputScriptElem(scriptelem[i + 1],

1304 compareSortKey(scriptelem + i, scriptelem + i + 1),

1305 hasExpansions(coleiter));

1306 }

1307 free(scriptelem);

1308 outputHTMLFooter();

1309 }

1310

1311 /**

1312 * Prints the header for the html

1313 * @param locale name

1314 * @param script

1315 * @param scriptcount number of scripts

1316 */

1317 void outputHTMLHeader(const char *locale, UScriptCode script[],

1318 int scriptcount)

1319 {

1320 fprintf(OUTPUT_, "<html>\n");

1321 fprintf(OUTPUT_, "<head>\n");

1322 fprintf(OUTPUT_, "<meta http-equiv=\"Content-Type\" content=\"text/html; cha rset=utf-8\">\n");

1323 fprintf(OUTPUT_, "<meta http-equiv=\"Content-Language\" content=\"en-us\">\n ");

1324 fprintf(OUTPUT_, "<link rel=\"stylesheet\" href=\"charts.css\" type=\"text/c ss\">\n");

1325 fprintf(OUTPUT_, "<title>ICU Collation charts</title>\n");

1326 fprintf(OUTPUT_, "<base target=\"main\">\n");

1327 fprintf(OUTPUT_, "</head>\n");

1328

1329 fprintf(OUTPUT_, "<body bgcolor=#FFFFFF>\n");

1330 fprintf(OUTPUT_, "<!--\n");

1331 fprintf(OUTPUT_, "This file contains sorted characters in ascending order ac cording to the locale stated\n");

1332 fprintf(OUTPUT_, "If the character is in red, it is tailored in the collatio n rules.\n");

1333 fprintf(OUTPUT_, "Background colours have certain meanings:\n");

1334 fprintf(OUTPUT_, "White - equals the previous character\n");

1335 fprintf(OUTPUT_, "dark blue - primary greater than the previous character\n" );

1336 fprintf(OUTPUT_, "blue - secondary greater than the previous character\n");

1337 fprintf(OUTPUT_, "light blue - tertiary greater than the previous character\ n");

1338 fprintf(OUTPUT_, "--!>\n");

1339

1340 fprintf(OUTPUT_, "<table border=0>\n");

1341 UChar displayname[64];

1342 UErrorCode error = U_ZERO_ERROR;

1343 int32_t size = uloc_getDisplayName(locale, "en_US", displayname, 64, &error) ;

1344 char utf8displayname[128];

1345 if (U_FAILURE(error)) {

1346 utf8displayname[0] = 0;

1347 }

1348 else {

1349 int32_t utf8size = 0;

1350 u_strToUTF8(utf8displayname, 128, &utf8size, displayname, size, &error);

1351 }

1352

1353 fprintf(OUTPUT_, "<tr><th>Locale</th><td class='noborder'>%s</td></tr>\n", u tf8displayname);

1354 fprintf(OUTPUT_, "<tr><th>Script(s)</th>");

1355 fprintf(OUTPUT_, "<td class='noborder'>");

1356 for (int i = 0; i < scriptcount; i ++) {

1357 fprintf(OUTPUT_, "%s", uscript_getName(script[i]));

1358 if (i + 1 != scriptcount) {

1359 fprintf(OUTPUT_, ", ");

1360 }

1361 }

1362 fprintf(OUTPUT_, "</td></tr>\n");

1363

1364 fprintf(OUTPUT_, "<tr><th>Rules</th><td class='noborder'><a href=\"http://de v.icu-project.org/cgi-bin/viewcvs.cgi/checkout/icu/source/data/coll/%s.txt\">% s.txt</a></td></tr>\n", locale, locale);

1365

1366 UVersionInfo version;

1367 ucol_getVersion(COLLATOR_, version);

1368 fprintf(OUTPUT_, "<tr><th>Collator version</th><td class='noborder'>%d.%d.%d .%d</td></tr>\n",

1369 version[0], version[1], version[2], version[3]);

1370

1371 UColAttribute attr = UCOL_FRENCH_COLLATION;

1372 while (attr < UCOL_ATTRIBUTE_COUNT) {

1373 UColAttributeValue value = ucol_getAttribute(COLLATOR_, attr, &error);

1374 if (U_FAILURE(error)) {

1375 fprintf(stdout, "Error getting attribute\n");

1376 return;

1377 }

1378 if (value != UCOL_DEFAULT) {

1379 if (attr == UCOL_FRENCH_COLLATION && value != UCOL_OFF) {

1380 fprintf(OUTPUT_, "<tr><th>French Collation</th><td class='nobord er'>on, code %d</td></tr>\n", value);

1381 }

1382 if (attr == UCOL_ALTERNATE_HANDLING && value != UCOL_NON_IGNORABLE) {

1383 fprintf(OUTPUT_, "<tr><th>Alternate Handling</th><td class='nobo rder'>shifted, code%d</td></tr>\n", value);

1384 }

1385 if (attr == UCOL_CASE_FIRST && value != UCOL_OFF) {

1386 fprintf(OUTPUT_, "<tr><th>Case First</th><td class='noborder'>on , code %d</td></tr>\n", value);

1387 }

1388 if (attr == UCOL_CASE_LEVEL && value != UCOL_OFF) {

1389 fprintf(OUTPUT_, "<tr><th>Case Level</th><td class='noborder'>on , code %d</td></tr>\n", value);

1390 }

1391 if (attr == UCOL_NORMALIZATION_MODE && value != UCOL_OFF) {

1392 fprintf(OUTPUT_, "<tr><th>Normalization</th><td class='noborder' >on, code %d</td></tr>\n", value);

1393 }

1394 if (attr == UCOL_STRENGTH && value != UCOL_TERTIARY) {

1395 fprintf(OUTPUT_, "<tr><th>Strength</th><td class='noborder'>code %d</td></tr>\n", value);

1396 }

1397 if (attr == UCOL_HIRAGANA_QUATERNARY_MODE && value != UCOL_OFF) {

1398 fprintf(OUTPUT_, "<tr><th>Hiragana Quaternary</th><td class='nob order'>on, code %d</td></tr>\n", value);

1399 }

1400 }

1401 attr = (UColAttribute)(attr + 1);

1402 }

1403

1404 // Get UNIX-style time and display as number and string.

1405 time_t ltime;

1406 time( &ltime );

1407 fprintf(OUTPUT_, "<tr><th>Date Generated</th><td class='noborder'>%s</td></t r>", ctime(&ltime));

1408

1409 fprintf(OUTPUT_, "</table>\n");

1410

1411 fprintf(OUTPUT_, "<p><a href=help.html>How to read the table</a><br>\n");

1412 fprintf(OUTPUT_, "<a href=http://www.jtcsv.com/cgi-bin/icu-bugs/ target=new> Submit a bug</a></p>\n");

1413 fprintf(OUTPUT_, "\n<table>\n");

1414 fprintf(OUTPUT_, "\n<tr><th>Codepoint</th><th>P</th><th>S</th><th>T</th><th> Q</th><th>Name</th></tr>\n");

1415 }

1416

1417 /**

1418 * Prints the header for index.html

1419 * @param file output file

1420 */

1421 void outputListHTMLHeader(FILE *file)

1422 {

1423 fprintf(file, "<html>\n");

1424 fprintf(file, "<head>\n");

1425 fprintf(file, "<meta http-equiv=\"Content-Type\" content=\"text/html; charse t=utf-8\">\n");

1426 fprintf(file, "<meta http-equiv=\"Content-Language\" content=\"en-us\">\n");

1427 fprintf(file, "<title>ICU Collation Charts</title>\n");

1428 fprintf(file, "<base target=\"main\">\n");

1429 fprintf(file, "</head>\n");

1430 fprintf(file, "<body bgcolor=#FFFFFF>\n");

1431 fprintf(file, "<h2 align=center>ICU Collation Charts</h2>\n");

1432 fprintf(file, "<p align=center>\n");

1433 fprintf(file, "<a href=http://www.unicode.org/charts/collation/ target=new>U CA Charts</a><br>");

1434 }

1435

1436 /**

1437 * Prints the footer for index.html

1438 * @param file output file

1439 */

1440 void outputListHTMLFooter(FILE *file)

1441 {

1442 fprintf(file, "</p>\n");

1443 //fprintf(file, "<center><image src=http://oss.software.ibm.com/icu/imag es/w24.gif></center>\n");

1444 fprintf(file, "</body>\n");

1445 fprintf(file, "</html>\n");

1446 }

1447

1448 /**

1449 * Gets all scripts and serialize their codepoints into an html file.

1450 */

1451 void serializeScripts() {

1452 char filename[128];

1453 int dirlength = 0;

1454

1455 if (options[4].doesOccur) {

1456 strcpy(filename, options[4].value);

1457 dirlength = appendDirSeparator(filename);

1458 } else {

1459 filename[0] = 0;

1460 }

1461

1462 const char *locale;

1463 int32_t localelist = 0;

1464 int32_t localesize;

1465

1466 localesize = ucol_countAvailable();

1467 locale = ucol_getAvailable(localelist);

1468

1469 strcat(filename, "list.html");

1470 FILE *list = fopen(filename, "w");

1471 filename[dirlength] = 0;

1472 if (list == NULL) {

1473 fprintf(stdout, "Cannot open file: %s\n", filename);

1474 return;

1475 }

1476

1477 outputListHTMLHeader(list);

1478 fprintf(list, "<blockquote>\n");

1479 while (TRUE) {

1480 UErrorCode error = U_ZERO_ERROR;

1481 COLLATOR_ = ucol_open(locale, &error);

1482 if (U_FAILURE(error)) {

1483 fprintf(stdout, "Collator creation failed:");

1484 fprintf(stdout, u_errorName(error));

1485 break;

1486 }

1487 if ((error != U_USING_FALLBACK_WARNING && // not tailored

1488 error != U_USING_DEFAULT_WARNING) \|\|

1489 checkLocaleForLanguage(locale)) {

1490 fprintf(list, "<a href=%s.html>%s</a> ", locale, locale);

1491 setAttributes(COLLATOR_, &error);

1492 if (U_FAILURE(error)) {

1493 fprintf(stdout, "Collator attribute setting failed:");

1494 fprintf(stdout, u_errorName(error));

1495 break;

1496 }

1497

1498 UScriptCode scriptcode[32];

1499 uint32_t scriptcount = uscript_getCode(locale, scriptcode, 32,

1500 &error);

1501 if (U_FAILURE(error)) {

1502 fprintf(stdout, "Error getting lcale scripts\n");

1503 break;

1504 }

1505

1506 strcat(filename, locale);

1507 strcat(filename, ".html");

1508 OUTPUT_ = fopen(filename, "w");

1509 if (OUTPUT_ == NULL) {

1510 fprintf(stdout, "Cannot open file:%s\n", filename);

1511 break;

1512 }

1513 outputHTMLHeader(locale, scriptcode, scriptcount);

1514 fprintf(stdout, "%s\n", locale);

1515

1516 if(options[12].doesOccur) {

1517 // use whole scripts

1518 serializeScripts(scriptcode, scriptcount);

1519 } else {

1520 // use exemplar chars

1521 serializeScripts(scriptcode, scriptcount, locale);

1522 }

1523 fclose(OUTPUT_);

1524 }

1525 ucol_close(COLLATOR_);

1526

1527 filename[dirlength] = 0;

1528 localelist ++;

1529 if (localelist == localesize) {

1530 break;

1531 }

1532 locale = ucol_getAvailable(localelist);

1533 }

1534 fprintf(list, "<br><a href=help.html>help</a><br>");

1535 fprintf(list, "</blockquote>\n");

1536 outputListHTMLFooter(list);

1537 fclose(list);

1538 }

1539

1540 /**

1541 * Main -- process command line, read in and pre-process the test file,

1542 * call other functions to do the actual tests.

1543 */

1544 int main(int argc, char *argv[]) {

1545

1546 argc = u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]),

1547 options);

1548

1549 // error handling, printing usage message

1550 if (argc < 0) {

1551 fprintf(stdout, "error in command line argument: ");

1552 fprintf(stdout, argv[-argc]);

1553 fprintf(stdout, "\n");

1554 }

1555 if (argc < 0 \|\| options[0].doesOccur \|\| options[1].doesOccur) {

1556 fprintf(stdout, "Usage: dumpce options...\n"

1557 "--help\n"

1558 " Display this message.\n"

1559 "--locale name\|all\n"

1560 " ICU locale to use. Default is en_US\n"

1561 "--serialize\n"

1562 " Serializes the collation elements in -locale or all locales available and outputs them into --outputdir/locale_ce.txt\n"

1563 "--destdir dir_name\n"

1564 " Path for outputing the serialized collation element s. Defaults to stdout if no defined\n"

1565 "--sourcedir dir_name\n"

1566 " Path for the input rule file for collation\n"

1567 "--attribute name=value,name=value...\n"

1568 " Pairs of attribute names and values for setting\n"

1569 "--rule filename\n"

1570 " Name of file containing the collation rules.\n"

1571 "--normalizaton mode\n"

1572 " UNormalizationMode mode to be used.\n"

1573 "--scripts\n"

1574 " Codepoints from all scripts are sorted and serializ ed.\n"

1575 "--reducehan\n"

1576 " Only 200 Han script characters will be displayed wi th the use of --scripts.\n"

1577 "--wholescripts\n"

1578 " Show collation order for whole scripts instead of j ust for exemplar characters of a locale\n\n");

1579

1580 fprintf(stdout, "Example to generate *.txt files : dumpce --serialize -- locale af --destdir /temp --attribute UCOL_STRENGTH=UCOL_DEFAULT_STRENGTH,4=17\n \n");

1581 fprintf(stdout, "Example to generate *.html files for oss web display: d umpce --scripts --destdir /temp --reducehan\n");

1582 return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;

1583 }

1584

1585 OUTPUT_ = stdout;

1586 if (options[6].doesOccur) {

1587 fprintf(stdout, "attributes %s\n", options[6].value);

1588 parseAttributes();

1589 }

1590 if (options[3].doesOccur) {

1591 serialize();

1592 }

1593 if (options[9].doesOccur) {

1594 serializeScripts();

1595 }

1596 return 0;

1597 }

OLD	NEW

« no previous file with comments | « source/tools/dumpce/Makefile.in ('k') | source/tools/dumpce/dumpce.dsp » ('j') | no next file with comments »