OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file args.c |
| 4 /// \brief Argument parsing |
| 5 /// |
| 6 /// \note Filter-specific options parsing is in options.c. |
| 7 // |
| 8 // Author: Lasse Collin |
| 9 // |
| 10 // This file has been put into the public domain. |
| 11 // You can do whatever you want with this file. |
| 12 // |
| 13 /////////////////////////////////////////////////////////////////////////////// |
| 14 |
| 15 #include "private.h" |
| 16 |
| 17 #include "getopt.h" |
| 18 #include <ctype.h> |
| 19 |
| 20 |
| 21 bool opt_stdout = false; |
| 22 bool opt_force = false; |
| 23 bool opt_keep_original = false; |
| 24 bool opt_robot = false; |
| 25 |
| 26 // We don't modify or free() this, but we need to assign it in some |
| 27 // non-const pointers. |
| 28 const char *stdin_filename = "(stdin)"; |
| 29 |
| 30 |
| 31 static void |
| 32 parse_real(args_info *args, int argc, char **argv) |
| 33 { |
| 34 enum { |
| 35 OPT_X86 = INT_MIN, |
| 36 OPT_POWERPC, |
| 37 OPT_IA64, |
| 38 OPT_ARM, |
| 39 OPT_ARMTHUMB, |
| 40 OPT_SPARC, |
| 41 OPT_DELTA, |
| 42 OPT_LZMA1, |
| 43 OPT_LZMA2, |
| 44 |
| 45 OPT_NO_SPARSE, |
| 46 OPT_FILES, |
| 47 OPT_FILES0, |
| 48 OPT_NO_ADJUST, |
| 49 OPT_INFO_MEMORY, |
| 50 OPT_ROBOT, |
| 51 }; |
| 52 |
| 53 static const char short_opts[] |
| 54 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; |
| 55 |
| 56 static const struct option long_opts[] = { |
| 57 // Operation mode |
| 58 { "compress", no_argument, NULL, 'z' }, |
| 59 { "decompress", no_argument, NULL, 'd' }, |
| 60 { "uncompress", no_argument, NULL, 'd' }, |
| 61 { "test", no_argument, NULL, 't' }, |
| 62 { "list", no_argument, NULL, 'l' }, |
| 63 |
| 64 // Operation modifiers |
| 65 { "keep", no_argument, NULL, 'k' }, |
| 66 { "force", no_argument, NULL, 'f' }, |
| 67 { "stdout", no_argument, NULL, 'c' }, |
| 68 { "to-stdout", no_argument, NULL, 'c' }, |
| 69 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, |
| 70 { "suffix", required_argument, NULL, 'S' }, |
| 71 // { "recursive", no_argument, NULL, 'r' }, // TODO |
| 72 { "files", optional_argument, NULL, OPT_FILES }, |
| 73 { "files0", optional_argument, NULL, OPT_FILES0 }, |
| 74 |
| 75 // Basic compression settings |
| 76 { "format", required_argument, NULL, 'F' }, |
| 77 { "check", required_argument, NULL, 'C' }, |
| 78 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, |
| 79 { "memory", required_argument, NULL, 'M' }, |
| 80 { "threads", required_argument, NULL, 'T' }, |
| 81 |
| 82 { "extreme", no_argument, NULL, 'e' }, |
| 83 { "fast", no_argument, NULL, '0' }, |
| 84 { "best", no_argument, NULL, '9' }, |
| 85 |
| 86 // Filters |
| 87 { "lzma1", optional_argument, NULL, OPT_LZMA1 }, |
| 88 { "lzma2", optional_argument, NULL, OPT_LZMA2 }, |
| 89 { "x86", optional_argument, NULL, OPT_X86 }, |
| 90 { "powerpc", optional_argument, NULL, OPT_POWERPC }, |
| 91 { "ia64", optional_argument, NULL, OPT_IA64 }, |
| 92 { "arm", optional_argument, NULL, OPT_ARM }, |
| 93 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, |
| 94 { "sparc", optional_argument, NULL, OPT_SPARC }, |
| 95 { "delta", optional_argument, NULL, OPT_DELTA }, |
| 96 |
| 97 // Other options |
| 98 { "quiet", no_argument, NULL, 'q' }, |
| 99 { "verbose", no_argument, NULL, 'v' }, |
| 100 { "no-warn", no_argument, NULL, 'Q' }, |
| 101 { "robot", no_argument, NULL, OPT_ROBOT }, |
| 102 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, |
| 103 { "help", no_argument, NULL, 'h' }, |
| 104 { "long-help", no_argument, NULL, 'H' }, |
| 105 { "version", no_argument, NULL, 'V' }, |
| 106 |
| 107 { NULL, 0, NULL, 0 } |
| 108 }; |
| 109 |
| 110 int c; |
| 111 |
| 112 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) |
| 113 != -1) { |
| 114 switch (c) { |
| 115 // Compression preset (also for decompression if --format=raw) |
| 116 case '0': case '1': case '2': case '3': case '4': |
| 117 case '5': case '6': case '7': case '8': case '9': |
| 118 coder_set_preset(c - '0'); |
| 119 break; |
| 120 |
| 121 // --memory |
| 122 case 'M': { |
| 123 // Support specifying the limit as a percentage of |
| 124 // installed physical RAM. |
| 125 size_t len = strlen(optarg); |
| 126 if (len > 0 && optarg[len - 1] == '%') { |
| 127 optarg[len - 1] = '\0'; |
| 128 hardware_memlimit_set_percentage( |
| 129 str_to_uint64( |
| 130 "memory%", optarg, 1, 100)); |
| 131 } else { |
| 132 // On 32-bit systems, SIZE_MAX would make more |
| 133 // sense than UINT64_MAX. But use UINT64_MAX |
| 134 // still so that scripts that assume > 4 GiB |
| 135 // values don't break. |
| 136 hardware_memlimit_set(str_to_uint64( |
| 137 "memory", optarg, |
| 138 0, UINT64_MAX)); |
| 139 } |
| 140 |
| 141 break; |
| 142 } |
| 143 |
| 144 // --suffix |
| 145 case 'S': |
| 146 suffix_set(optarg); |
| 147 break; |
| 148 |
| 149 case 'T': |
| 150 hardware_threadlimit_set(str_to_uint64( |
| 151 "threads", optarg, 0, UINT32_MAX)); |
| 152 break; |
| 153 |
| 154 // --version |
| 155 case 'V': |
| 156 // This doesn't return. |
| 157 message_version(); |
| 158 |
| 159 // --stdout |
| 160 case 'c': |
| 161 opt_stdout = true; |
| 162 break; |
| 163 |
| 164 // --decompress |
| 165 case 'd': |
| 166 opt_mode = MODE_DECOMPRESS; |
| 167 break; |
| 168 |
| 169 // --extreme |
| 170 case 'e': |
| 171 coder_set_extreme(); |
| 172 break; |
| 173 |
| 174 // --force |
| 175 case 'f': |
| 176 opt_force = true; |
| 177 break; |
| 178 |
| 179 // --info-memory |
| 180 case OPT_INFO_MEMORY: |
| 181 // This doesn't return. |
| 182 message_memlimit(); |
| 183 |
| 184 // --help |
| 185 case 'h': |
| 186 // This doesn't return. |
| 187 message_help(false); |
| 188 |
| 189 // --long-help |
| 190 case 'H': |
| 191 // This doesn't return. |
| 192 message_help(true); |
| 193 |
| 194 // --list |
| 195 case 'l': |
| 196 opt_mode = MODE_LIST; |
| 197 break; |
| 198 |
| 199 // --keep |
| 200 case 'k': |
| 201 opt_keep_original = true; |
| 202 break; |
| 203 |
| 204 // --quiet |
| 205 case 'q': |
| 206 message_verbosity_decrease(); |
| 207 break; |
| 208 |
| 209 case 'Q': |
| 210 set_exit_no_warn(); |
| 211 break; |
| 212 |
| 213 case 't': |
| 214 opt_mode = MODE_TEST; |
| 215 break; |
| 216 |
| 217 // --verbose |
| 218 case 'v': |
| 219 message_verbosity_increase(); |
| 220 break; |
| 221 |
| 222 // --robot |
| 223 case OPT_ROBOT: |
| 224 opt_robot = true; |
| 225 |
| 226 // This is to make sure that floating point numbers |
| 227 // always have a dot as decimal separator. |
| 228 setlocale(LC_NUMERIC, "C"); |
| 229 break; |
| 230 |
| 231 case 'z': |
| 232 opt_mode = MODE_COMPRESS; |
| 233 break; |
| 234 |
| 235 // Filter setup |
| 236 |
| 237 case OPT_X86: |
| 238 coder_add_filter(LZMA_FILTER_X86, |
| 239 options_bcj(optarg)); |
| 240 break; |
| 241 |
| 242 case OPT_POWERPC: |
| 243 coder_add_filter(LZMA_FILTER_POWERPC, |
| 244 options_bcj(optarg)); |
| 245 break; |
| 246 |
| 247 case OPT_IA64: |
| 248 coder_add_filter(LZMA_FILTER_IA64, |
| 249 options_bcj(optarg)); |
| 250 break; |
| 251 |
| 252 case OPT_ARM: |
| 253 coder_add_filter(LZMA_FILTER_ARM, |
| 254 options_bcj(optarg)); |
| 255 break; |
| 256 |
| 257 case OPT_ARMTHUMB: |
| 258 coder_add_filter(LZMA_FILTER_ARMTHUMB, |
| 259 options_bcj(optarg)); |
| 260 break; |
| 261 |
| 262 case OPT_SPARC: |
| 263 coder_add_filter(LZMA_FILTER_SPARC, |
| 264 options_bcj(optarg)); |
| 265 break; |
| 266 |
| 267 case OPT_DELTA: |
| 268 coder_add_filter(LZMA_FILTER_DELTA, |
| 269 options_delta(optarg)); |
| 270 break; |
| 271 |
| 272 case OPT_LZMA1: |
| 273 coder_add_filter(LZMA_FILTER_LZMA1, |
| 274 options_lzma(optarg)); |
| 275 break; |
| 276 |
| 277 case OPT_LZMA2: |
| 278 coder_add_filter(LZMA_FILTER_LZMA2, |
| 279 options_lzma(optarg)); |
| 280 break; |
| 281 |
| 282 // Other |
| 283 |
| 284 // --format |
| 285 case 'F': { |
| 286 // Just in case, support both "lzma" and "alone" since |
| 287 // the latter was used for forward compatibility in |
| 288 // LZMA Utils 4.32.x. |
| 289 static const struct { |
| 290 char str[8]; |
| 291 enum format_type format; |
| 292 } types[] = { |
| 293 { "auto", FORMAT_AUTO }, |
| 294 { "xz", FORMAT_XZ }, |
| 295 { "lzma", FORMAT_LZMA }, |
| 296 { "alone", FORMAT_LZMA }, |
| 297 // { "gzip", FORMAT_GZIP }, |
| 298 // { "gz", FORMAT_GZIP }, |
| 299 { "raw", FORMAT_RAW }, |
| 300 }; |
| 301 |
| 302 size_t i = 0; |
| 303 while (strcmp(types[i].str, optarg) != 0) |
| 304 if (++i == ARRAY_SIZE(types)) |
| 305 message_fatal(_("%s: Unknown file " |
| 306 "format type"), |
| 307 optarg); |
| 308 |
| 309 opt_format = types[i].format; |
| 310 break; |
| 311 } |
| 312 |
| 313 // --check |
| 314 case 'C': { |
| 315 static const struct { |
| 316 char str[8]; |
| 317 lzma_check check; |
| 318 } types[] = { |
| 319 { "none", LZMA_CHECK_NONE }, |
| 320 { "crc32", LZMA_CHECK_CRC32 }, |
| 321 { "crc64", LZMA_CHECK_CRC64 }, |
| 322 { "sha256", LZMA_CHECK_SHA256 }, |
| 323 }; |
| 324 |
| 325 size_t i = 0; |
| 326 while (strcmp(types[i].str, optarg) != 0) { |
| 327 if (++i == ARRAY_SIZE(types)) |
| 328 message_fatal(_("%s: Unsupported " |
| 329 "integrity " |
| 330 "check type"), optarg); |
| 331 } |
| 332 |
| 333 // Use a separate check in case we are using different |
| 334 // liblzma than what was used to compile us. |
| 335 if (!lzma_check_is_supported(types[i].check)) |
| 336 message_fatal(_("%s: Unsupported integrity " |
| 337 "check type"), optarg); |
| 338 |
| 339 coder_set_check(types[i].check); |
| 340 break; |
| 341 } |
| 342 |
| 343 case OPT_NO_SPARSE: |
| 344 io_no_sparse(); |
| 345 break; |
| 346 |
| 347 case OPT_FILES: |
| 348 args->files_delim = '\n'; |
| 349 |
| 350 // Fall through |
| 351 |
| 352 case OPT_FILES0: |
| 353 if (args->files_name != NULL) |
| 354 message_fatal(_("Only one file can be " |
| 355 "specified with `--files' " |
| 356 "or `--files0'.")); |
| 357 |
| 358 if (optarg == NULL) { |
| 359 args->files_name = (char *)stdin_filename; |
| 360 args->files_file = stdin; |
| 361 } else { |
| 362 args->files_name = optarg; |
| 363 args->files_file = fopen(optarg, |
| 364 c == OPT_FILES ? "r" : "rb"); |
| 365 if (args->files_file == NULL) |
| 366 message_fatal("%s: %s", optarg, |
| 367 strerror(errno)); |
| 368 } |
| 369 |
| 370 break; |
| 371 |
| 372 case OPT_NO_ADJUST: |
| 373 opt_auto_adjust = false; |
| 374 break; |
| 375 |
| 376 default: |
| 377 message_try_help(); |
| 378 tuklib_exit(E_ERROR, E_ERROR, false); |
| 379 } |
| 380 } |
| 381 |
| 382 return; |
| 383 } |
| 384 |
| 385 |
| 386 static void |
| 387 parse_environment(args_info *args, char *argv0) |
| 388 { |
| 389 char *env = getenv("XZ_OPT"); |
| 390 if (env == NULL) |
| 391 return; |
| 392 |
| 393 // We modify the string, so make a copy of it. |
| 394 env = xstrdup(env); |
| 395 |
| 396 // Calculate the number of arguments in env. argc stats at one |
| 397 // to include space for the program name. |
| 398 int argc = 1; |
| 399 bool prev_was_space = true; |
| 400 for (size_t i = 0; env[i] != '\0'; ++i) { |
| 401 // NOTE: Cast to unsigned char is needed so that correct |
| 402 // value gets passed to isspace(), which expects |
| 403 // unsigned char cast to int. Casting to int is done |
| 404 // automatically due to integer promotion, but we need to |
| 405 // force char to unsigned char manually. Otherwise 8-bit |
| 406 // characters would get promoted to wrong value if |
| 407 // char is signed. |
| 408 if (isspace((unsigned char)env[i])) { |
| 409 prev_was_space = true; |
| 410 } else if (prev_was_space) { |
| 411 prev_was_space = false; |
| 412 |
| 413 // Keep argc small enough to fit into a singed int |
| 414 // and to keep it usable for memory allocation. |
| 415 if (++argc == my_min( |
| 416 INT_MAX, SIZE_MAX / sizeof(char *))) |
| 417 message_fatal(_("The environment variable " |
| 418 "XZ_OPT contains too many " |
| 419 "arguments")); |
| 420 } |
| 421 } |
| 422 |
| 423 // Allocate memory to hold pointers to the arguments. Add one to get |
| 424 // space for the terminating NULL (if some systems happen to need it). |
| 425 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); |
| 426 argv[0] = argv0; |
| 427 argv[argc] = NULL; |
| 428 |
| 429 // Go through the string again. Split the arguments using '\0' |
| 430 // characters and add pointers to the resulting strings to argv. |
| 431 argc = 1; |
| 432 prev_was_space = true; |
| 433 for (size_t i = 0; env[i] != '\0'; ++i) { |
| 434 if (isspace((unsigned char)env[i])) { |
| 435 prev_was_space = true; |
| 436 env[i] = '\0'; |
| 437 } else if (prev_was_space) { |
| 438 prev_was_space = false; |
| 439 argv[argc++] = env + i; |
| 440 } |
| 441 } |
| 442 |
| 443 // Parse the argument list we got from the environment. All non-option |
| 444 // arguments i.e. filenames are ignored. |
| 445 parse_real(args, argc, argv); |
| 446 |
| 447 // Reset the state of the getopt_long() so that we can parse the |
| 448 // command line options too. There are two incompatible ways to |
| 449 // do it. |
| 450 #ifdef HAVE_OPTRESET |
| 451 // BSD |
| 452 optind = 1; |
| 453 optreset = 1; |
| 454 #else |
| 455 // GNU, Solaris |
| 456 optind = 0; |
| 457 #endif |
| 458 |
| 459 // We don't need the argument list from environment anymore. |
| 460 free(argv); |
| 461 free(env); |
| 462 |
| 463 return; |
| 464 } |
| 465 |
| 466 |
| 467 extern void |
| 468 args_parse(args_info *args, int argc, char **argv) |
| 469 { |
| 470 // Initialize those parts of *args that we need later. |
| 471 args->files_name = NULL; |
| 472 args->files_file = NULL; |
| 473 args->files_delim = '\0'; |
| 474 |
| 475 // Check how we were called. |
| 476 { |
| 477 // Remove the leading path name, if any. |
| 478 const char *name = strrchr(argv[0], '/'); |
| 479 if (name == NULL) |
| 480 name = argv[0]; |
| 481 else |
| 482 ++name; |
| 483 |
| 484 // NOTE: It's possible that name[0] is now '\0' if argv[0] |
| 485 // is weird, but it doesn't matter here. |
| 486 |
| 487 // Look for full command names instead of substrings like |
| 488 // "un", "cat", and "lz" to reduce possibility of false |
| 489 // positives when the programs have been renamed. |
| 490 if (strstr(name, "xzcat") != NULL) { |
| 491 opt_mode = MODE_DECOMPRESS; |
| 492 opt_stdout = true; |
| 493 } else if (strstr(name, "unxz") != NULL) { |
| 494 opt_mode = MODE_DECOMPRESS; |
| 495 } else if (strstr(name, "lzcat") != NULL) { |
| 496 opt_format = FORMAT_LZMA; |
| 497 opt_mode = MODE_DECOMPRESS; |
| 498 opt_stdout = true; |
| 499 } else if (strstr(name, "unlzma") != NULL) { |
| 500 opt_format = FORMAT_LZMA; |
| 501 opt_mode = MODE_DECOMPRESS; |
| 502 } else if (strstr(name, "lzma") != NULL) { |
| 503 opt_format = FORMAT_LZMA; |
| 504 } |
| 505 } |
| 506 |
| 507 // First the flags from environment |
| 508 parse_environment(args, argv[0]); |
| 509 |
| 510 // Then from the command line |
| 511 parse_real(args, argc, argv); |
| 512 |
| 513 // Never remove the source file when the destination is not on disk. |
| 514 // In test mode the data is written nowhere, but setting opt_stdout |
| 515 // will make the rest of the code behave well. |
| 516 if (opt_stdout || opt_mode == MODE_TEST) { |
| 517 opt_keep_original = true; |
| 518 opt_stdout = true; |
| 519 } |
| 520 |
| 521 // When compressing, if no --format flag was used, or it |
| 522 // was --format=auto, we compress to the .xz format. |
| 523 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) |
| 524 opt_format = FORMAT_XZ; |
| 525 |
| 526 // Compression settings need to be validated (options themselves and |
| 527 // their memory usage) when compressing to any file format. It has to |
| 528 // be done also when uncompressing raw data, since for raw decoding |
| 529 // the options given on the command line are used to know what kind |
| 530 // of raw data we are supposed to decode. |
| 531 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) |
| 532 coder_set_compression_settings(); |
| 533 |
| 534 // If no filenames are given, use stdin. |
| 535 if (argv[optind] == NULL && args->files_name == NULL) { |
| 536 // We don't modify or free() the "-" constant. The caller |
| 537 // modifies this so don't make the struct itself const. |
| 538 static char *names_stdin[2] = { (char *)"-", NULL }; |
| 539 args->arg_names = names_stdin; |
| 540 args->arg_count = 1; |
| 541 } else { |
| 542 // We got at least one filename from the command line, or |
| 543 // --files or --files0 was specified. |
| 544 args->arg_names = argv + optind; |
| 545 args->arg_count = argc - optind; |
| 546 } |
| 547 |
| 548 return; |
| 549 } |
OLD | NEW |