OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2002-2013, International Business Machines | 3 * Copyright (C) 2002-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * | 6 * |
7 * File gendict.cpp | 7 * File gendict.cpp |
8 */ | 8 */ |
9 | 9 |
10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
11 #include "unicode/uchar.h" | 11 #include "unicode/uchar.h" |
12 #include "unicode/ucnv.h" | 12 #include "unicode/ucnv.h" |
13 #include "unicode/uniset.h" | 13 #include "unicode/uniset.h" |
(...skipping 22 matching lines...) Expand all Loading... |
36 #include <stdlib.h> | 36 #include <stdlib.h> |
37 #include <string.h> | 37 #include <string.h> |
38 | 38 |
39 #include "putilimp.h" | 39 #include "putilimp.h" |
40 UDate startTime; | 40 UDate startTime; |
41 | 41 |
42 static int elapsedTime() { | 42 static int elapsedTime() { |
43 return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); | 43 return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); |
44 } | 44 } |
45 | 45 |
46 #if U_PLATFORM_IMPLEMENTS_POSIX && !U_PLATFORM_HAS_WIN32_API | |
47 | |
48 #include <signal.h> | |
49 #include <unistd.h> | |
50 | |
51 const char *wToolname="gendict"; | |
52 const char *wOutname="(some file)"; | |
53 | |
54 const int firstSeconds = 5; /* seconds between notices*/ | |
55 const int nextSeconds = 15; /* seconds between notices*/ | |
56 | |
57 static void alarm_fn(int /*n*/) { | |
58 printf("%s: still writing\t%s (%ds)\t...\n", wToolname, wOutname, elapsedTi
me()); | |
59 | |
60 signal(SIGALRM, &alarm_fn); | |
61 alarm(nextSeconds); // reset the alarm | |
62 } | |
63 | |
64 static void install_watchdog(const char *toolName, const char *outFileName) { | |
65 wToolname=toolName; | |
66 wOutname=outFileName; | |
67 | |
68 signal(SIGALRM, &alarm_fn); | |
69 | |
70 alarm(firstSeconds); // set the alarm | |
71 } | |
72 | |
73 #else | |
74 static void install_watchdog(const char*, const char*) { | |
75 // not implemented | |
76 } | |
77 #endif | |
78 | |
79 | |
80 | |
81 | |
82 U_NAMESPACE_USE | 46 U_NAMESPACE_USE |
83 | 47 |
84 static char *progName; | 48 static char *progName; |
85 static UOption options[]={ | 49 static UOption options[]={ |
86 UOPTION_HELP_H, /* 0 */ | 50 UOPTION_HELP_H, /* 0 */ |
87 UOPTION_HELP_QUESTION_MARK, /* 1 */ | 51 UOPTION_HELP_QUESTION_MARK, /* 1 */ |
88 UOPTION_VERBOSE, /* 2 */ | 52 UOPTION_VERBOSE, /* 2 */ |
89 UOPTION_ICUDATADIR, /* 4 */ | 53 UOPTION_ICUDATADIR, /* 4 */ |
90 UOPTION_COPYRIGHT, /* 5 */ | 54 UOPTION_COPYRIGHT, /* 5 */ |
91 { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ | 55 { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ |
92 { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ | 56 { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ |
93 { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ | 57 { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ |
| 58 UOPTION_QUIET, /* 9 */ |
94 }; | 59 }; |
95 | 60 |
96 enum arguments { | 61 enum arguments { |
97 ARG_HELP = 0, | 62 ARG_HELP = 0, |
98 ARG_QMARK, | 63 ARG_QMARK, |
99 ARG_VERBOSE, | 64 ARG_VERBOSE, |
100 ARG_ICUDATADIR, | 65 ARG_ICUDATADIR, |
101 ARG_COPYRIGHT, | 66 ARG_COPYRIGHT, |
102 ARG_UCHARS, | 67 ARG_UCHARS, |
103 ARG_BYTES, | 68 ARG_BYTES, |
104 ARG_TRANSFORM | 69 ARG_TRANSFORM, |
| 70 ARG_QUIET |
105 }; | 71 }; |
106 | 72 |
107 // prints out the standard usage method describing command line arguments, | 73 // prints out the standard usage method describing command line arguments, |
108 // then bails out with the desired exit code | 74 // then bails out with the desired exit code |
109 static void usageAndDie(UErrorCode retCode) { | 75 static void usageAndDie(UErrorCode retCode) { |
110 fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-optio
ns] input-dictionary-file output-file\n", progName); | 76 fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-optio
ns] input-dictionary-file output-file\n", progName); |
111 fprintf((U_SUCCESS(retCode) ? stdout : stderr), | 77 fprintf((U_SUCCESS(retCode) ? stdout : stderr), |
112 "\tRead in a word list and write out a string trie dictionary\n" | 78 "\tRead in a word list and write out a string trie dictionary\n" |
113 "options:\n" | 79 "options:\n" |
114 "\t-h or -? or --help this usage text\n" | 80 "\t-h or -? or --help this usage text\n" |
115 "\t-V or --version show a version message\n" | 81 "\t-V or --version show a version message\n" |
116 "\t-c or --copyright include a copyright notice\n" | 82 "\t-c or --copyright include a copyright notice\n" |
117 "\t-v or --verbose turn on verbose output\n" | 83 "\t-v or --verbose turn on verbose output\n" |
| 84 "\t-q or --quiet do not display warnings and progress\n" |
118 "\t-i or --icudatadir directory for locating any needed intermediate
data files,\n" // TODO: figure out if we need this option | 85 "\t-i or --icudatadir directory for locating any needed intermediate
data files,\n" // TODO: figure out if we need this option |
119 "\t followed by path, defaults to %s\n" | 86 "\t followed by path, defaults to %s\n" |
120 "\t--uchars output a UCharsTrie (mutually exclusive with -
b!)\n" | 87 "\t--uchars output a UCharsTrie (mutually exclusive with -
b!)\n" |
121 "\t--bytes output a BytesTrie (mutually exclusive with -u
!)\n" | 88 "\t--bytes output a BytesTrie (mutually exclusive with -u
!)\n" |
122 "\t--transform the kind of transform to use (eg --transform o
ffset-40A3,\n" | 89 "\t--transform the kind of transform to use (eg --transform o
ffset-40A3,\n" |
123 "\t which specifies an offset transform with const
ant 0x40A3)\n", | 90 "\t which specifies an offset transform with const
ant 0x40A3)\n", |
124 u_getDataDirectory()); | 91 u_getDataDirectory()); |
125 exit(retCode); | 92 exit(retCode); |
126 } | 93 } |
127 | 94 |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
284 fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); | 251 fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); |
285 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | 252 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); |
286 } | 253 } |
287 | 254 |
288 if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { | 255 if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { |
289 // -? or -h for help. | 256 // -? or -h for help. |
290 usageAndDie(U_ZERO_ERROR); | 257 usageAndDie(U_ZERO_ERROR); |
291 } | 258 } |
292 | 259 |
293 UBool verbose = options[ARG_VERBOSE].doesOccur; | 260 UBool verbose = options[ARG_VERBOSE].doesOccur; |
| 261 UBool quiet = options[ARG_QUIET].doesOccur; |
294 | 262 |
295 if (argc < 3) { | 263 if (argc < 3) { |
296 fprintf(stderr, "input and output file must both be specified.\n"); | 264 fprintf(stderr, "input and output file must both be specified.\n"); |
297 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | 265 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); |
298 } | 266 } |
299 const char *outFileName = argv[2]; | 267 const char *outFileName = argv[2]; |
300 const char *wordFileName = argv[1]; | 268 const char *wordFileName = argv[1]; |
301 | 269 |
302 startTime = uprv_getRawUTCtime(); // initialize start timer | 270 startTime = uprv_getRawUTCtime(); // initialize start timer |
303 // set up the watchdog | |
304 install_watchdog(progName, outFileName); | |
305 | 271 |
306 if (options[ARG_ICUDATADIR].doesOccur) { | 272 » if (options[ARG_ICUDATADIR].doesOccur) { |
307 u_setDataDirectory(options[ARG_ICUDATADIR].value); | 273 u_setDataDirectory(options[ARG_ICUDATADIR].value); |
308 } | 274 } |
309 | 275 |
310 const char *copyright = NULL; | 276 const char *copyright = NULL; |
311 if (options[ARG_COPYRIGHT].doesOccur) { | 277 if (options[ARG_COPYRIGHT].doesOccur) { |
312 copyright = U_COPYRIGHT_STRING; | 278 copyright = U_COPYRIGHT_STRING; |
313 } | 279 } |
314 | 280 |
315 if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { | 281 if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { |
316 fprintf(stderr, "you must specify exactly one type of trie to output!\n"
); | 282 fprintf(stderr, "you must specify exactly one type of trie to output!\n"
); |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
475 if (status.isFailure()) { | 441 if (status.isFailure()) { |
476 fprintf(stderr, "gendict: error \"%s\" writing the output file\n", statu
s.errorName()); | 442 fprintf(stderr, "gendict: error \"%s\" writing the output file\n", statu
s.errorName()); |
477 exit(status.reset()); | 443 exit(status.reset()); |
478 } | 444 } |
479 | 445 |
480 if (bytesWritten != (size_t)size) { | 446 if (bytesWritten != (size_t)size) { |
481 fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); | 447 fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); |
482 exit(U_INTERNAL_PROGRAM_ERROR); | 448 exit(U_INTERNAL_PROGRAM_ERROR); |
483 } | 449 } |
484 | 450 |
485 printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()
); | 451 if (!quiet) { printf("%s: done writing\t%s (%ds).\n", progName, outFileName,
elapsedTime()); } |
486 | 452 |
487 #ifdef TEST_GENDICT | 453 #ifdef TEST_GENDICT |
488 if (isBytesTrie) { | 454 if (isBytesTrie) { |
489 BytesTrie::Iterator it(outData, outDataSize, status); | 455 BytesTrie::Iterator it(outData, outDataSize, status); |
490 while (it.hasNext()) { | 456 while (it.hasNext()) { |
491 it.next(status); | 457 it.next(status); |
492 const StringPiece s = it.getString(); | 458 const StringPiece s = it.getString(); |
493 int32_t val = it.getValue(); | 459 int32_t val = it.getValue(); |
494 printf("%s -> %i\n", s.data(), val); | 460 printf("%s -> %i\n", s.data(), val); |
495 } | 461 } |
496 } else { | 462 } else { |
497 UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); | 463 UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); |
498 while (it.hasNext()) { | 464 while (it.hasNext()) { |
499 it.next(status); | 465 it.next(status); |
500 const UnicodeString s = it.getString(); | 466 const UnicodeString s = it.getString(); |
501 int32_t val = it.getValue(); | 467 int32_t val = it.getValue(); |
502 char tmp[1024]; | 468 char tmp[1024]; |
503 s.extract(0, s.length(), tmp, 1024); | 469 s.extract(0, s.length(), tmp, 1024); |
504 printf("%s -> %i\n", tmp, val); | 470 printf("%s -> %i\n", tmp, val); |
505 } | 471 } |
506 } | 472 } |
507 #endif | 473 #endif |
508 | 474 |
509 return 0; | 475 return 0; |
510 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 476 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
511 } | 477 } |
OLD | NEW |