| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2002-2013, International Business Machines | 3 * Copyright (C) 2002-2015, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * | 6 * |
| 7 * File gendict.cpp | 7 * File gendict.cpp |
| 8 */ | 8 */ |
| 9 | 9 |
| 10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
| 11 #include "unicode/uchar.h" | 11 #include "unicode/uchar.h" |
| 12 #include "unicode/ucnv.h" | 12 #include "unicode/ucnv.h" |
| 13 #include "unicode/uniset.h" | 13 #include "unicode/uniset.h" |
| (...skipping 22 matching lines...) Expand all Loading... |
| 36 #include <stdlib.h> | 36 #include <stdlib.h> |
| 37 #include <string.h> | 37 #include <string.h> |
| 38 | 38 |
| 39 #include "putilimp.h" | 39 #include "putilimp.h" |
| 40 UDate startTime; | 40 UDate startTime; |
| 41 | 41 |
| 42 static int elapsedTime() { | 42 static int elapsedTime() { |
| 43 return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); | 43 return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); |
| 44 } | 44 } |
| 45 | 45 |
| 46 #if U_PLATFORM_IMPLEMENTS_POSIX && !U_PLATFORM_HAS_WIN32_API | |
| 47 | |
| 48 #include <signal.h> | |
| 49 #include <unistd.h> | |
| 50 | |
| 51 const char *wToolname="gendict"; | |
| 52 const char *wOutname="(some file)"; | |
| 53 | |
| 54 const int firstSeconds = 5; /* seconds between notices*/ | |
| 55 const int nextSeconds = 15; /* seconds between notices*/ | |
| 56 | |
| 57 static void alarm_fn(int /*n*/) { | |
| 58 printf("%s: still writing\t%s (%ds)\t...\n", wToolname, wOutname, elapsedTi
me()); | |
| 59 | |
| 60 signal(SIGALRM, &alarm_fn); | |
| 61 alarm(nextSeconds); // reset the alarm | |
| 62 } | |
| 63 | |
| 64 static void install_watchdog(const char *toolName, const char *outFileName) { | |
| 65 wToolname=toolName; | |
| 66 wOutname=outFileName; | |
| 67 | |
| 68 signal(SIGALRM, &alarm_fn); | |
| 69 | |
| 70 alarm(firstSeconds); // set the alarm | |
| 71 } | |
| 72 | |
| 73 #else | |
| 74 static void install_watchdog(const char*, const char*) { | |
| 75 // not implemented | |
| 76 } | |
| 77 #endif | |
| 78 | |
| 79 | |
| 80 | |
| 81 | |
| 82 U_NAMESPACE_USE | 46 U_NAMESPACE_USE |
| 83 | 47 |
| 84 static char *progName; | 48 static char *progName; |
| 85 static UOption options[]={ | 49 static UOption options[]={ |
| 86 UOPTION_HELP_H, /* 0 */ | 50 UOPTION_HELP_H, /* 0 */ |
| 87 UOPTION_HELP_QUESTION_MARK, /* 1 */ | 51 UOPTION_HELP_QUESTION_MARK, /* 1 */ |
| 88 UOPTION_VERBOSE, /* 2 */ | 52 UOPTION_VERBOSE, /* 2 */ |
| 89 UOPTION_ICUDATADIR, /* 4 */ | 53 UOPTION_ICUDATADIR, /* 4 */ |
| 90 UOPTION_COPYRIGHT, /* 5 */ | 54 UOPTION_COPYRIGHT, /* 5 */ |
| 91 { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ | 55 { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ |
| 92 { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ | 56 { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ |
| 93 { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ | 57 { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ |
| 58 UOPTION_QUIET, /* 9 */ |
| 94 }; | 59 }; |
| 95 | 60 |
| 96 enum arguments { | 61 enum arguments { |
| 97 ARG_HELP = 0, | 62 ARG_HELP = 0, |
| 98 ARG_QMARK, | 63 ARG_QMARK, |
| 99 ARG_VERBOSE, | 64 ARG_VERBOSE, |
| 100 ARG_ICUDATADIR, | 65 ARG_ICUDATADIR, |
| 101 ARG_COPYRIGHT, | 66 ARG_COPYRIGHT, |
| 102 ARG_UCHARS, | 67 ARG_UCHARS, |
| 103 ARG_BYTES, | 68 ARG_BYTES, |
| 104 ARG_TRANSFORM | 69 ARG_TRANSFORM, |
| 70 ARG_QUIET |
| 105 }; | 71 }; |
| 106 | 72 |
| 107 // prints out the standard usage method describing command line arguments, | 73 // prints out the standard usage method describing command line arguments, |
| 108 // then bails out with the desired exit code | 74 // then bails out with the desired exit code |
| 109 static void usageAndDie(UErrorCode retCode) { | 75 static void usageAndDie(UErrorCode retCode) { |
| 110 fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-optio
ns] input-dictionary-file output-file\n", progName); | 76 fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-optio
ns] input-dictionary-file output-file\n", progName); |
| 111 fprintf((U_SUCCESS(retCode) ? stdout : stderr), | 77 fprintf((U_SUCCESS(retCode) ? stdout : stderr), |
| 112 "\tRead in a word list and write out a string trie dictionary\n" | 78 "\tRead in a word list and write out a string trie dictionary\n" |
| 113 "options:\n" | 79 "options:\n" |
| 114 "\t-h or -? or --help this usage text\n" | 80 "\t-h or -? or --help this usage text\n" |
| 115 "\t-V or --version show a version message\n" | 81 "\t-V or --version show a version message\n" |
| 116 "\t-c or --copyright include a copyright notice\n" | 82 "\t-c or --copyright include a copyright notice\n" |
| 117 "\t-v or --verbose turn on verbose output\n" | 83 "\t-v or --verbose turn on verbose output\n" |
| 84 "\t-q or --quiet do not display warnings and progress\n" |
| 118 "\t-i or --icudatadir directory for locating any needed intermediate
data files,\n" // TODO: figure out if we need this option | 85 "\t-i or --icudatadir directory for locating any needed intermediate
data files,\n" // TODO: figure out if we need this option |
| 119 "\t followed by path, defaults to %s\n" | 86 "\t followed by path, defaults to %s\n" |
| 120 "\t--uchars output a UCharsTrie (mutually exclusive with -
b!)\n" | 87 "\t--uchars output a UCharsTrie (mutually exclusive with -
b!)\n" |
| 121 "\t--bytes output a BytesTrie (mutually exclusive with -u
!)\n" | 88 "\t--bytes output a BytesTrie (mutually exclusive with -u
!)\n" |
| 122 "\t--transform the kind of transform to use (eg --transform o
ffset-40A3,\n" | 89 "\t--transform the kind of transform to use (eg --transform o
ffset-40A3,\n" |
| 123 "\t which specifies an offset transform with const
ant 0x40A3)\n", | 90 "\t which specifies an offset transform with const
ant 0x40A3)\n", |
| 124 u_getDataDirectory()); | 91 u_getDataDirectory()); |
| 125 exit(retCode); | 92 exit(retCode); |
| 126 } | 93 } |
| 127 | 94 |
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 284 fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); | 251 fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); |
| 285 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | 252 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); |
| 286 } | 253 } |
| 287 | 254 |
| 288 if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { | 255 if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { |
| 289 // -? or -h for help. | 256 // -? or -h for help. |
| 290 usageAndDie(U_ZERO_ERROR); | 257 usageAndDie(U_ZERO_ERROR); |
| 291 } | 258 } |
| 292 | 259 |
| 293 UBool verbose = options[ARG_VERBOSE].doesOccur; | 260 UBool verbose = options[ARG_VERBOSE].doesOccur; |
| 261 UBool quiet = options[ARG_QUIET].doesOccur; |
| 294 | 262 |
| 295 if (argc < 3) { | 263 if (argc < 3) { |
| 296 fprintf(stderr, "input and output file must both be specified.\n"); | 264 fprintf(stderr, "input and output file must both be specified.\n"); |
| 297 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | 265 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); |
| 298 } | 266 } |
| 299 const char *outFileName = argv[2]; | 267 const char *outFileName = argv[2]; |
| 300 const char *wordFileName = argv[1]; | 268 const char *wordFileName = argv[1]; |
| 301 | 269 |
| 302 startTime = uprv_getRawUTCtime(); // initialize start timer | 270 startTime = uprv_getRawUTCtime(); // initialize start timer |
| 303 // set up the watchdog | |
| 304 install_watchdog(progName, outFileName); | |
| 305 | 271 |
| 306 if (options[ARG_ICUDATADIR].doesOccur) { | 272 » if (options[ARG_ICUDATADIR].doesOccur) { |
| 307 u_setDataDirectory(options[ARG_ICUDATADIR].value); | 273 u_setDataDirectory(options[ARG_ICUDATADIR].value); |
| 308 } | 274 } |
| 309 | 275 |
| 310 const char *copyright = NULL; | 276 const char *copyright = NULL; |
| 311 if (options[ARG_COPYRIGHT].doesOccur) { | 277 if (options[ARG_COPYRIGHT].doesOccur) { |
| 312 copyright = U_COPYRIGHT_STRING; | 278 copyright = U_COPYRIGHT_STRING; |
| 313 } | 279 } |
| 314 | 280 |
| 315 if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { | 281 if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { |
| 316 fprintf(stderr, "you must specify exactly one type of trie to output!\n"
); | 282 fprintf(stderr, "you must specify exactly one type of trie to output!\n"
); |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 475 if (status.isFailure()) { | 441 if (status.isFailure()) { |
| 476 fprintf(stderr, "gendict: error \"%s\" writing the output file\n", statu
s.errorName()); | 442 fprintf(stderr, "gendict: error \"%s\" writing the output file\n", statu
s.errorName()); |
| 477 exit(status.reset()); | 443 exit(status.reset()); |
| 478 } | 444 } |
| 479 | 445 |
| 480 if (bytesWritten != (size_t)size) { | 446 if (bytesWritten != (size_t)size) { |
| 481 fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); | 447 fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); |
| 482 exit(U_INTERNAL_PROGRAM_ERROR); | 448 exit(U_INTERNAL_PROGRAM_ERROR); |
| 483 } | 449 } |
| 484 | 450 |
| 485 printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()
); | 451 if (!quiet) { printf("%s: done writing\t%s (%ds).\n", progName, outFileName,
elapsedTime()); } |
| 486 | 452 |
| 487 #ifdef TEST_GENDICT | 453 #ifdef TEST_GENDICT |
| 488 if (isBytesTrie) { | 454 if (isBytesTrie) { |
| 489 BytesTrie::Iterator it(outData, outDataSize, status); | 455 BytesTrie::Iterator it(outData, outDataSize, status); |
| 490 while (it.hasNext()) { | 456 while (it.hasNext()) { |
| 491 it.next(status); | 457 it.next(status); |
| 492 const StringPiece s = it.getString(); | 458 const StringPiece s = it.getString(); |
| 493 int32_t val = it.getValue(); | 459 int32_t val = it.getValue(); |
| 494 printf("%s -> %i\n", s.data(), val); | 460 printf("%s -> %i\n", s.data(), val); |
| 495 } | 461 } |
| 496 } else { | 462 } else { |
| 497 UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); | 463 UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); |
| 498 while (it.hasNext()) { | 464 while (it.hasNext()) { |
| 499 it.next(status); | 465 it.next(status); |
| 500 const UnicodeString s = it.getString(); | 466 const UnicodeString s = it.getString(); |
| 501 int32_t val = it.getValue(); | 467 int32_t val = it.getValue(); |
| 502 char tmp[1024]; | 468 char tmp[1024]; |
| 503 s.extract(0, s.length(), tmp, 1024); | 469 s.extract(0, s.length(), tmp, 1024); |
| 504 printf("%s -> %i\n", tmp, val); | 470 printf("%s -> %i\n", tmp, val); |
| 505 } | 471 } |
| 506 } | 472 } |
| 507 #endif | 473 #endif |
| 508 | 474 |
| 509 return 0; | 475 return 0; |
| 510 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 476 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| 511 } | 477 } |
| OLD | NEW |