OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2005-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: icupkg.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2005jul29 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * This tool operates on ICU data (.dat package) files. |
| 17 * It takes one as input, or creates an empty one, and can remove, add, and |
| 18 * extract data pieces according to command-line options. |
| 19 * At the same time, it swaps each piece to a consistent set of platform |
| 20 * properties as desired. |
| 21 * Useful as an install-time tool for shipping only one flavor of ICU data |
| 22 * and preparing data files for the target platform. |
| 23 * Also for customizing ICU data (pruning, augmenting, replacing) and for |
| 24 * taking it apart. |
| 25 * Subsumes functionality and implementation code from |
| 26 * gencmn, decmn, and icuswap tools. |
| 27 * Will not work with data DLLs (shared libraries). |
| 28 */ |
| 29 |
| 30 #include "unicode/utypes.h" |
| 31 #include "unicode/putil.h" |
| 32 #include "cstring.h" |
| 33 #include "toolutil.h" |
| 34 #include "uoptions.h" |
| 35 #include "uparse.h" |
| 36 #include "filestrm.h" |
| 37 #include "package.h" |
| 38 #include "pkg_icu.h" |
| 39 |
| 40 #include <stdio.h> |
| 41 #include <stdlib.h> |
| 42 #include <string.h> |
| 43 |
| 44 U_NAMESPACE_USE |
| 45 |
| 46 // TODO: add --matchmode=regex for using the ICU regex engine for item name patt
ern matching? |
| 47 |
| 48 // general definitions ----------------------------------------------------- *** |
| 49 |
| 50 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 51 |
| 52 // main() ------------------------------------------------------------------ *** |
| 53 |
| 54 static void |
| 55 printUsage(const char *pname, UBool isHelp) { |
| 56 FILE *where=isHelp ? stdout : stderr; |
| 57 |
| 58 fprintf(where, |
| 59 "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n" |
| 60 "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n" |
| 61 "\t[-s path] [-d path] [-w] [-m mode]\n" |
| 62 "\tinfilename [outfilename]\n", |
| 63 isHelp ? 'U' : 'u', pname); |
| 64 if(isHelp) { |
| 65 fprintf(where, |
| 66 "\n" |
| 67 "Read the input ICU .dat package file, modify it according to the op
tions,\n" |
| 68 "swap it to the desired platform properties (charset & endianness),\
n" |
| 69 "and optionally write the resulting ICU .dat package to the output f
ile.\n" |
| 70 "Items are removed, then added, then extracted and listed.\n" |
| 71 "An ICU .dat package is written if items are removed or added,\n" |
| 72 "or if the input and output filenames differ,\n" |
| 73 "or if the --writepkg (-w) option is set.\n"); |
| 74 fprintf(where, |
| 75 "\n" |
| 76 "If the input filename is \"new\" then an empty package is created.\
n" |
| 77 "If the output filename is missing, then it is automatically generat
ed\n" |
| 78 "from the input filename: If the input filename ends with an l, b, o
r e\n" |
| 79 "matching its platform properties, then the output filename will\n" |
| 80 "contain the letter from the -t (--type) option.\n"); |
| 81 fprintf(where, |
| 82 "\n" |
| 83 "This tool can also be used to just swap a single ICU data file, rep
lacing the\n" |
| 84 "former icuswap tool. For this mode, provide the infilename (and opt
ional\n" |
| 85 "outfilename) for a non-package ICU data file.\n" |
| 86 "Allowed options include -t, -w, -s and -d.\n" |
| 87 "The filenames can be absolute, or relative to the source/dest dir p
aths.\n" |
| 88 "Other options are not allowed in this mode.\n"); |
| 89 fprintf(where, |
| 90 "\n" |
| 91 "Options:\n" |
| 92 "\t(Only the last occurrence of an option is used.)\n" |
| 93 "\n" |
| 94 "\t-h or -? or --help print this message and exit\n"); |
| 95 fprintf(where, |
| 96 "\n" |
| 97 "\t-tl or --type l output for little-endian/ASCII charset family\n
" |
| 98 "\t-tb or --type b output for big-endian/ASCII charset family\n" |
| 99 "\t-te or --type e output for big-endian/EBCDIC charset family\n" |
| 100 "\t The output type defaults to the input type.\n" |
| 101 "\n" |
| 102 "\t-c or --copyright include the ICU copyright notice\n" |
| 103 "\t-C comment or --comment comment include a comment string\n"); |
| 104 fprintf(where, |
| 105 "\n" |
| 106 "\t-a list or --add list add items to the package\n" |
| 107 "\t-r list or --remove list remove items from the package\n" |
| 108 "\t-x list or --extract list extract items from the package\n" |
| 109 "\tThe list can be a single item's filename,\n" |
| 110 "\tor a .txt filename with a list of item filenames,\n" |
| 111 "\tor an ICU .dat package filename.\n"); |
| 112 fprintf(where, |
| 113 "\n" |
| 114 "\t-w or --writepkg write the output package even if no items are r
emoved\n" |
| 115 "\t or added (e.g., for only swapping the data)\n")
; |
| 116 fprintf(where, |
| 117 "\n" |
| 118 "\t-m mode or --matchmode mode set the matching mode for item names
with\n" |
| 119 "\t wildcards\n" |
| 120 "\t noslash: the '*' wildcard does not match the '/' tree sep
arator\n"); |
| 121 /* |
| 122 * Usage text columns, starting after the initial TAB. |
| 123 * 1 2 3 4 5 6 7
8 |
| 124 * 90123456789012345678901234567890123456789012345678901234567890123
4567890 |
| 125 */ |
| 126 fprintf(where, |
| 127 "\n" |
| 128 "\tList file syntax: Items are listed on one or more lines and separ
ated\n" |
| 129 "\tby whitespace (space+tab).\n" |
| 130 "\tComments begin with # and are ignored. Empty lines are ignored.\n
" |
| 131 "\tLines where the first non-whitespace character is one of %s\n" |
| 132 "\tare also ignored, to reserve for future syntax.\n", |
| 133 U_PKG_RESERVED_CHARS); |
| 134 fprintf(where, |
| 135 "\tItems for removal or extraction may contain a single '*' wildcard
\n" |
| 136 "\tcharacter. The '*' matches zero or more characters.\n" |
| 137 "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n" |
| 138 "\tdoes not match '/'.\n"); |
| 139 fprintf(where, |
| 140 "\n" |
| 141 "\tItems must be listed relative to the package, and the --sourcedir
or\n" |
| 142 "\tthe --destdir path will be prepended.\n" |
| 143 "\tThe paths are only prepended to item filenames while adding or\n" |
| 144 "\textracting items, not to ICU .dat package or list filenames.\n" |
| 145 "\t\n" |
| 146 "\tPaths may contain '/' instead of the platform's\n" |
| 147 "\tfile separator character, and are converted as appropriate.\n"); |
| 148 fprintf(where, |
| 149 "\n" |
| 150 "\t-s path or --sourcedir path directory for the --add items\n" |
| 151 "\t-d path or --destdir path directory for the --extract items\n" |
| 152 "\n" |
| 153 "\t-l or --list list the package items to stdout or
to output list file\n" |
| 154 "\t (after modifying the package)\n"); |
| 155 } |
| 156 } |
| 157 |
| 158 static UOption options[]={ |
| 159 UOPTION_HELP_H, |
| 160 UOPTION_HELP_QUESTION_MARK, |
| 161 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG), |
| 162 |
| 163 UOPTION_COPYRIGHT, |
| 164 UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG), |
| 165 |
| 166 UOPTION_SOURCEDIR, |
| 167 UOPTION_DESTDIR, |
| 168 |
| 169 UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG), |
| 170 |
| 171 UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG), |
| 172 |
| 173 UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG), |
| 174 UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG), |
| 175 UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG), |
| 176 |
| 177 UOPTION_DEF("list", 'l', UOPT_NO_ARG), |
| 178 |
| 179 UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG) |
| 180 }; |
| 181 |
| 182 enum { |
| 183 OPT_HELP_H, |
| 184 OPT_HELP_QUESTION_MARK, |
| 185 OPT_OUT_TYPE, |
| 186 |
| 187 OPT_COPYRIGHT, |
| 188 OPT_COMMENT, |
| 189 |
| 190 OPT_SOURCEDIR, |
| 191 OPT_DESTDIR, |
| 192 |
| 193 OPT_WRITEPKG, |
| 194 |
| 195 OPT_MATCHMODE, |
| 196 |
| 197 OPT_ADD_LIST, |
| 198 OPT_REMOVE_LIST, |
| 199 OPT_EXTRACT_LIST, |
| 200 |
| 201 OPT_LIST_ITEMS, |
| 202 |
| 203 OPT_LIST_FILE, |
| 204 |
| 205 OPT_COUNT |
| 206 }; |
| 207 |
| 208 static UBool |
| 209 isPackageName(const char *filename) { |
| 210 int32_t len; |
| 211 |
| 212 len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */ |
| 213 return (UBool)(len>0 && 0==strcmp(filename+len, ".dat")); |
| 214 } |
| 215 /* |
| 216 This line is required by MinGW because it incorrectly globs the arguments. |
| 217 So when \* is used, it turns into a list of files instead of a literal "*" |
| 218 */ |
| 219 int _CRT_glob = 0; |
| 220 |
| 221 extern int |
| 222 main(int argc, char *argv[]) { |
| 223 const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outCo
mment; |
| 224 char outType; |
| 225 UBool isHelp, isModified, isPackage; |
| 226 int result = 0; |
| 227 |
| 228 Package *pkg, *listPkg, *addListPkg; |
| 229 |
| 230 U_MAIN_INIT_ARGS(argc, argv); |
| 231 |
| 232 /* get the program basename */ |
| 233 pname=findBasename(argv[0]); |
| 234 |
| 235 argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
| 236 isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].does
Occur; |
| 237 if(isHelp) { |
| 238 printUsage(pname, TRUE); |
| 239 return U_ZERO_ERROR; |
| 240 } |
| 241 if(argc<2 || 3<argc) { |
| 242 printUsage(pname, FALSE); |
| 243 return U_ILLEGAL_ARGUMENT_ERROR; |
| 244 } |
| 245 |
| 246 pkg=new Package; |
| 247 if(pkg==NULL) { |
| 248 fprintf(stderr, "icupkg: not enough memory\n"); |
| 249 return U_MEMORY_ALLOCATION_ERROR; |
| 250 } |
| 251 isModified=FALSE; |
| 252 |
| 253 if(options[OPT_SOURCEDIR].doesOccur) { |
| 254 sourcePath=options[OPT_SOURCEDIR].value; |
| 255 } else { |
| 256 // work relative to the current working directory |
| 257 sourcePath=NULL; |
| 258 } |
| 259 if(options[OPT_DESTDIR].doesOccur) { |
| 260 destPath=options[OPT_DESTDIR].value; |
| 261 } else { |
| 262 // work relative to the current working directory |
| 263 destPath=NULL; |
| 264 } |
| 265 |
| 266 if(0==strcmp(argv[1], "new")) { |
| 267 inFilename=NULL; |
| 268 isPackage=TRUE; |
| 269 } else { |
| 270 inFilename=argv[1]; |
| 271 if(isPackageName(inFilename)) { |
| 272 pkg->readPackage(inFilename); |
| 273 isPackage=TRUE; |
| 274 } else { |
| 275 /* swap a single file (icuswap replacement) rather than work on a pa
ckage */ |
| 276 pkg->addFile(sourcePath, inFilename); |
| 277 isPackage=FALSE; |
| 278 } |
| 279 } |
| 280 |
| 281 if(argc>=3) { |
| 282 outFilename=argv[2]; |
| 283 if(0!=strcmp(argv[1], argv[2])) { |
| 284 isModified=TRUE; |
| 285 } |
| 286 } else if(isPackage) { |
| 287 outFilename=NULL; |
| 288 } else /* !isPackage */ { |
| 289 outFilename=inFilename; |
| 290 isModified=(UBool)(sourcePath!=destPath); |
| 291 } |
| 292 |
| 293 /* parse the output type option */ |
| 294 if(options[OPT_OUT_TYPE].doesOccur) { |
| 295 const char *type=options[OPT_OUT_TYPE].value; |
| 296 if(type[0]==0 || type[1]!=0) { |
| 297 /* the type must be exactly one letter */ |
| 298 printUsage(pname, FALSE); |
| 299 return U_ILLEGAL_ARGUMENT_ERROR; |
| 300 } |
| 301 outType=type[0]; |
| 302 switch(outType) { |
| 303 case 'l': |
| 304 case 'b': |
| 305 case 'e': |
| 306 break; |
| 307 default: |
| 308 printUsage(pname, FALSE); |
| 309 return U_ILLEGAL_ARGUMENT_ERROR; |
| 310 } |
| 311 |
| 312 /* |
| 313 * Set the isModified flag if the output type differs from the |
| 314 * input package type. |
| 315 * If we swap a single file, just assume that we are modifying it. |
| 316 * The Package class does not give us access to the item and its type. |
| 317 */ |
| 318 isModified|=(UBool)(!isPackage || outType!=pkg->getInType()); |
| 319 } else if(isPackage) { |
| 320 outType=pkg->getInType(); // default to input type |
| 321 } else /* !isPackage: swap single file */ { |
| 322 outType=0; /* tells extractItem() to not swap */ |
| 323 } |
| 324 |
| 325 if(options[OPT_WRITEPKG].doesOccur) { |
| 326 isModified=TRUE; |
| 327 } |
| 328 |
| 329 if(!isPackage) { |
| 330 /* |
| 331 * icuswap tool replacement: Only swap a single file. |
| 332 * Check that irrelevant options are not set. |
| 333 */ |
| 334 if( options[OPT_COMMENT].doesOccur || |
| 335 options[OPT_COPYRIGHT].doesOccur || |
| 336 options[OPT_MATCHMODE].doesOccur || |
| 337 options[OPT_REMOVE_LIST].doesOccur || |
| 338 options[OPT_ADD_LIST].doesOccur || |
| 339 options[OPT_EXTRACT_LIST].doesOccur || |
| 340 options[OPT_LIST_ITEMS].doesOccur |
| 341 ) { |
| 342 printUsage(pname, FALSE); |
| 343 return U_ILLEGAL_ARGUMENT_ERROR; |
| 344 } |
| 345 if(isModified) { |
| 346 pkg->extractItem(destPath, outFilename, 0, outType); |
| 347 } |
| 348 |
| 349 delete pkg; |
| 350 return result; |
| 351 } |
| 352 |
| 353 /* Work with a package. */ |
| 354 |
| 355 if(options[OPT_COMMENT].doesOccur) { |
| 356 outComment=options[OPT_COMMENT].value; |
| 357 } else if(options[OPT_COPYRIGHT].doesOccur) { |
| 358 outComment=U_COPYRIGHT_STRING; |
| 359 } else { |
| 360 outComment=NULL; |
| 361 } |
| 362 |
| 363 if(options[OPT_MATCHMODE].doesOccur) { |
| 364 if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) { |
| 365 pkg->setMatchMode(Package::MATCH_NOSLASH); |
| 366 } else { |
| 367 printUsage(pname, FALSE); |
| 368 return U_ILLEGAL_ARGUMENT_ERROR; |
| 369 } |
| 370 } |
| 371 |
| 372 /* remove items */ |
| 373 if(options[OPT_REMOVE_LIST].doesOccur) { |
| 374 listPkg=readList(NULL, options[OPT_REMOVE_LIST].value, FALSE); |
| 375 if(listPkg!=NULL) { |
| 376 pkg->removeItems(*listPkg); |
| 377 delete listPkg; |
| 378 isModified=TRUE; |
| 379 } else { |
| 380 printUsage(pname, FALSE); |
| 381 return U_ILLEGAL_ARGUMENT_ERROR; |
| 382 } |
| 383 } |
| 384 |
| 385 /* |
| 386 * add items |
| 387 * use a separate Package so that its memory and items stay around |
| 388 * as long as the main Package |
| 389 */ |
| 390 addListPkg=NULL; |
| 391 if(options[OPT_ADD_LIST].doesOccur) { |
| 392 addListPkg=readList(sourcePath, options[OPT_ADD_LIST].value, TRUE); |
| 393 if(addListPkg!=NULL) { |
| 394 pkg->addItems(*addListPkg); |
| 395 // delete addListPkg; deferred until after writePackage() |
| 396 isModified=TRUE; |
| 397 } else { |
| 398 printUsage(pname, FALSE); |
| 399 return U_ILLEGAL_ARGUMENT_ERROR; |
| 400 } |
| 401 } |
| 402 |
| 403 /* extract items */ |
| 404 if(options[OPT_EXTRACT_LIST].doesOccur) { |
| 405 listPkg=readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE); |
| 406 if(listPkg!=NULL) { |
| 407 pkg->extractItems(destPath, *listPkg, outType); |
| 408 delete listPkg; |
| 409 } else { |
| 410 printUsage(pname, FALSE); |
| 411 return U_ILLEGAL_ARGUMENT_ERROR; |
| 412 } |
| 413 } |
| 414 |
| 415 /* list items */ |
| 416 if(options[OPT_LIST_ITEMS].doesOccur) { |
| 417 int32_t i; |
| 418 if (options[OPT_LIST_FILE].doesOccur) { |
| 419 FileStream *out; |
| 420 out = T_FileStream_open(options[OPT_LIST_FILE].value, "w"); |
| 421 if (out != NULL) { |
| 422 for(i=0; i<pkg->getItemCount(); ++i) { |
| 423 T_FileStream_writeLine(out, pkg->getItem(i)->name); |
| 424 T_FileStream_writeLine(out, "\n"); |
| 425 } |
| 426 T_FileStream_close(out); |
| 427 } else { |
| 428 return U_ILLEGAL_ARGUMENT_ERROR; |
| 429 } |
| 430 } else { |
| 431 for(i=0; i<pkg->getItemCount(); ++i) { |
| 432 fprintf(stdout, "%s\n", pkg->getItem(i)->name); |
| 433 } |
| 434 } |
| 435 } |
| 436 |
| 437 /* check dependencies between items */ |
| 438 if(!pkg->checkDependencies()) { |
| 439 /* some dependencies are not fulfilled */ |
| 440 return U_MISSING_RESOURCE_ERROR; |
| 441 } |
| 442 |
| 443 /* write the output .dat package if there are any modifications */ |
| 444 if(isModified) { |
| 445 char outFilenameBuffer[1024]; // for auto-generated output filename, if
necessary |
| 446 |
| 447 if(outFilename==NULL || outFilename[0]==0) { |
| 448 if(inFilename==NULL || inFilename[0]==0) { |
| 449 fprintf(stderr, "icupkg: unable to auto-generate an output filen
ame if there is no input filename\n"); |
| 450 exit(U_ILLEGAL_ARGUMENT_ERROR); |
| 451 } |
| 452 |
| 453 /* |
| 454 * auto-generate a filename: |
| 455 * copy the inFilename, |
| 456 * and if the last basename character matches the input file's type, |
| 457 * then replace it with the output file's type |
| 458 */ |
| 459 char suffix[6]="?.dat"; |
| 460 char *s; |
| 461 |
| 462 suffix[0]=pkg->getInType(); |
| 463 strcpy(outFilenameBuffer, inFilename); |
| 464 s=strchr(outFilenameBuffer, 0); |
| 465 if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) { |
| 466 *(s-5)=outType; |
| 467 } |
| 468 outFilename=outFilenameBuffer; |
| 469 } |
| 470 result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, o
utType); |
| 471 } |
| 472 |
| 473 delete addListPkg; |
| 474 delete pkg; |
| 475 return result; |
| 476 } |
| 477 |
| 478 /* |
| 479 * Hey, Emacs, please set the following: |
| 480 * |
| 481 * Local Variables: |
| 482 * indent-tabs-mode: nil |
| 483 * End: |
| 484 * |
| 485 */ |
OLD | NEW |