OLD | NEW |
(Empty) | |
| 1 /* |
| 2 american fuzzy lop - wrapper for GNU as |
| 3 --------------------------------------- |
| 4 |
| 5 Written and maintained by Michal Zalewski <lcamtuf@google.com> |
| 6 |
| 7 Copyright 2013, 2014, 2015 Google Inc. All rights reserved. |
| 8 |
| 9 Licensed under the Apache License, Version 2.0 (the "License"); |
| 10 you may not use this file except in compliance with the License. |
| 11 You may obtain a copy of the License at: |
| 12 |
| 13 http://www.apache.org/licenses/LICENSE-2.0 |
| 14 |
| 15 The sole purpose of this wrapper is to preprocess assembly files generated |
| 16 by GCC / clang and inject the instrumentation bits included from afl-as.h. It |
| 17 is automatically invoked by the toolchain when compiling programs using |
| 18 afl-gcc / afl-clang. |
| 19 |
| 20 Note that it's an explicit non-goal to instrument hand-written assembly, |
| 21 be it in separate .s files or in __asm__ blocks. The only aspiration this |
| 22 utility has right now is to be able to skip them gracefully and allow the |
| 23 compilation process to continue. |
| 24 |
| 25 That said, see experimental/clang_asm_normalize/ for a solution that may |
| 26 allow clang users to make things work even with hand-crafted assembly. Just |
| 27 note that there is no equivalent for GCC. |
| 28 |
| 29 */ |
| 30 |
| 31 #define AFL_MAIN |
| 32 |
| 33 #include "config.h" |
| 34 #include "types.h" |
| 35 #include "debug.h" |
| 36 #include "alloc-inl.h" |
| 37 |
| 38 #include "afl-as.h" |
| 39 |
| 40 #include <stdio.h> |
| 41 #include <unistd.h> |
| 42 #include <stdlib.h> |
| 43 #include <string.h> |
| 44 #include <time.h> |
| 45 #include <ctype.h> |
| 46 #include <fcntl.h> |
| 47 |
| 48 #include <sys/wait.h> |
| 49 #include <sys/time.h> |
| 50 |
| 51 static u8** as_params; /* Parameters passed to the real 'as' */ |
| 52 |
| 53 static u8* input_file; /* Originally specified input file */ |
| 54 static u8* modified_file; /* Instrumented file for the real 'as' */ |
| 55 |
| 56 static u8 be_quiet, /* Quiet mode (no stderr output) */ |
| 57 clang_mode, /* Running in clang mode? */ |
| 58 pass_thru, /* Just pass data through? */ |
| 59 just_version; /* Just show version? */ |
| 60 |
| 61 static u32 inst_ratio = 100, /* Instrumentation probability (%) */ |
| 62 as_par_cnt = 1; /* Number of params to 'as' */ |
| 63 |
| 64 /* If we don't find --32 or --64 in the command line, default to |
| 65 instrumentation for whichever mode we were compiled with. This is not |
| 66 perfect, but should do the trick for almost all use cases. */ |
| 67 |
| 68 #ifdef __x86_64__ |
| 69 |
| 70 static u8 use_64bit = 1; |
| 71 |
| 72 #else |
| 73 |
| 74 static u8 use_64bit = 0; |
| 75 |
| 76 #ifdef __APPLE__ |
| 77 # error "Sorry, 32-bit Apple platforms are not supported." |
| 78 #endif /* __APPLE__ */ |
| 79 |
| 80 #endif /* ^__x86_64__ */ |
| 81 |
| 82 |
| 83 /* Examine and modify parameters to pass to 'as'. Note that the file name |
| 84 is always the last parameter passed by GCC, so we exploit this property |
| 85 to keep the code simple. */ |
| 86 |
| 87 static void edit_params(int argc, char** argv) { |
| 88 |
| 89 u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); |
| 90 u32 i; |
| 91 |
| 92 #ifdef __APPLE__ |
| 93 |
| 94 u8 use_clang_as = 0; |
| 95 |
| 96 /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work |
| 97 with the code generated by newer versions of clang that are hand-built |
| 98 by the user. See the thread here: http://goo.gl/HBWDtn. |
| 99 |
| 100 To work around this, when using clang and running without AFL_AS |
| 101 specified, we will actually call 'clang -c' instead of 'as -q' to |
| 102 compile the assembly file. |
| 103 |
| 104 The tools aren't cmdline-compatible, but at least for now, we can |
| 105 seemingly get away with this by making only very minor tweaks. Thanks |
| 106 to Nico Weber for the idea. */ |
| 107 |
| 108 if (clang_mode && !afl_as) { |
| 109 |
| 110 use_clang_as = 1; |
| 111 |
| 112 afl_as = getenv("AFL_CC"); |
| 113 if (!afl_as) afl_as = getenv("AFL_CXX"); |
| 114 if (!afl_as) afl_as = "clang"; |
| 115 |
| 116 } |
| 117 |
| 118 #endif /* __APPLE__ */ |
| 119 |
| 120 /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR |
| 121 is not set. We need to check these non-standard variables to properly |
| 122 handle the pass_thru logic later on. */ |
| 123 |
| 124 if (!tmp_dir) tmp_dir = getenv("TEMP"); |
| 125 if (!tmp_dir) tmp_dir = getenv("TMP"); |
| 126 if (!tmp_dir) tmp_dir = "/tmp"; |
| 127 |
| 128 as_params = ck_alloc((argc + 32) * sizeof(u8*)); |
| 129 |
| 130 as_params[0] = afl_as ? afl_as : (u8*)"as"; |
| 131 |
| 132 as_params[argc] = 0; |
| 133 |
| 134 for (i = 1; i < argc - 1; i++) { |
| 135 |
| 136 if (!strcmp(argv[i], "--64")) use_64bit = 1; |
| 137 else if (!strcmp(argv[i], "--32")) use_64bit = 0; |
| 138 |
| 139 #ifdef __APPLE__ |
| 140 |
| 141 /* The Apple case is a bit different... */ |
| 142 |
| 143 if (!strcmp(argv[i], "-arch") && i + 1 < argc) { |
| 144 |
| 145 if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1; |
| 146 else if (!strcmp(argv[i + 1], "i386")) |
| 147 FATAL("Sorry, 32-bit Apple platforms are not supported."); |
| 148 |
| 149 } |
| 150 |
| 151 /* Strip options that set the preference for a particular upstream |
| 152 assembler in Xcode. */ |
| 153 |
| 154 if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q"))) |
| 155 continue; |
| 156 |
| 157 #endif /* __APPLE__ */ |
| 158 |
| 159 as_params[as_par_cnt++] = argv[i]; |
| 160 |
| 161 } |
| 162 |
| 163 #ifdef __APPLE__ |
| 164 |
| 165 /* When calling clang as the upstream assembler, append -c -x assembler |
| 166 and hope for the best. */ |
| 167 |
| 168 if (use_clang_as) { |
| 169 |
| 170 as_params[as_par_cnt++] = "-c"; |
| 171 as_params[as_par_cnt++] = "-x"; |
| 172 as_params[as_par_cnt++] = "assembler"; |
| 173 |
| 174 } |
| 175 |
| 176 #endif /* __APPLE__ */ |
| 177 |
| 178 input_file = argv[argc - 1]; |
| 179 |
| 180 if (input_file[0] == '-') { |
| 181 |
| 182 if (!strcmp(input_file + 1, "-version")) { |
| 183 just_version = 1; |
| 184 modified_file = input_file; |
| 185 goto wrap_things_up; |
| 186 } |
| 187 |
| 188 if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)"); |
| 189 else input_file = NULL; |
| 190 |
| 191 } else { |
| 192 |
| 193 /* Check if this looks like a standard invocation as a part of an attempt |
| 194 to compile a program, rather than using gcc on an ad-hoc .s file in |
| 195 a format we may not understand. This works around an issue compiling |
| 196 NSS. */ |
| 197 |
| 198 if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) && |
| 199 strncmp(input_file, "/var/tmp/", 9) && |
| 200 strncmp(input_file, "/tmp/", 5)) pass_thru = 1; |
| 201 |
| 202 } |
| 203 |
| 204 modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(), |
| 205 (u32)time(NULL)); |
| 206 |
| 207 wrap_things_up: |
| 208 |
| 209 as_params[as_par_cnt++] = modified_file; |
| 210 as_params[as_par_cnt] = NULL; |
| 211 |
| 212 } |
| 213 |
| 214 |
| 215 /* Process input file, generate modified_file. Insert instrumentation in all |
| 216 the appropriate places. */ |
| 217 |
| 218 static void add_instrumentation(void) { |
| 219 |
| 220 static u8 line[MAX_LINE]; |
| 221 |
| 222 FILE* inf; |
| 223 FILE* outf; |
| 224 s32 outfd; |
| 225 u32 ins_lines = 0; |
| 226 |
| 227 u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, |
| 228 skip_intel = 0, skip_app = 0, instrument_next = 0; |
| 229 |
| 230 #ifdef __APPLE__ |
| 231 |
| 232 u8* colon_pos; |
| 233 |
| 234 #endif /* __APPLE__ */ |
| 235 |
| 236 if (input_file) { |
| 237 |
| 238 inf = fopen(input_file, "r"); |
| 239 if (!inf) PFATAL("Unable to read '%s'", input_file); |
| 240 |
| 241 } else inf = stdin; |
| 242 |
| 243 outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); |
| 244 |
| 245 if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file); |
| 246 |
| 247 outf = fdopen(outfd, "w"); |
| 248 |
| 249 if (!outf) PFATAL("fdopen() failed"); |
| 250 |
| 251 while (fgets(line, MAX_LINE, inf)) { |
| 252 |
| 253 /* In some cases, we want to defer writing the instrumentation trampoline |
| 254 until after all the labels, macros, comments, etc. If we're in this |
| 255 mode, and if the line starts with a tab followed by a character, dump |
| 256 the trampoline now. */ |
| 257 |
| 258 if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok && |
| 259 instrument_next && line[0] == '\t' && isalpha(line[1])) { |
| 260 |
| 261 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, |
| 262 R(MAP_SIZE)); |
| 263 |
| 264 instrument_next = 0; |
| 265 ins_lines++; |
| 266 |
| 267 } |
| 268 |
| 269 /* Output the actual line, call it a day in pass-thru mode. */ |
| 270 |
| 271 fputs(line, outf); |
| 272 |
| 273 if (pass_thru) continue; |
| 274 |
| 275 /* All right, this is where the actual fun begins. For one, we only want to |
| 276 instrument the .text section. So, let's keep track of that in processed |
| 277 files - and let's set instr_ok accordingly. */ |
| 278 |
| 279 if (line[0] == '\t' && line[1] == '.') { |
| 280 |
| 281 /* OpenBSD puts jump tables directly inline with the code, which is |
| 282 a bit annoying. They use a specific format of p2align directives |
| 283 around them, so we use that as a signal. */ |
| 284 |
| 285 if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) && |
| 286 isdigit(line[10]) && line[11] == '\n') skip_next_label = 1; |
| 287 |
| 288 if (!strncmp(line + 2, "text\n", 5) || |
| 289 !strncmp(line + 2, "section\t.text", 13) || |
| 290 !strncmp(line + 2, "section\t__TEXT,__text", 21) || |
| 291 !strncmp(line + 2, "section __TEXT,__text", 21)) { |
| 292 instr_ok = 1; |
| 293 continue; |
| 294 } |
| 295 |
| 296 if (!strncmp(line + 2, "section\t", 8) || |
| 297 !strncmp(line + 2, "section ", 8) || |
| 298 !strncmp(line + 2, "bss\n", 4) || |
| 299 !strncmp(line + 2, "data\n", 5)) { |
| 300 instr_ok = 0; |
| 301 continue; |
| 302 } |
| 303 |
| 304 } |
| 305 |
| 306 /* Detect off-flavor assembly (rare, happens in gdb). When this is |
| 307 encountered, we set skip_csect until the opposite directive is |
| 308 seen, and we do not instrument. */ |
| 309 |
| 310 if (strstr(line, ".code")) { |
| 311 |
| 312 if (strstr(line, ".code32")) skip_csect = use_64bit; |
| 313 if (strstr(line, ".code64")) skip_csect = !use_64bit; |
| 314 |
| 315 } |
| 316 |
| 317 /* Detect syntax changes, as could happen with hand-written assembly. |
| 318 Skip Intel blocks, resume instrumentation when back to AT&T. */ |
| 319 |
| 320 if (strstr(line, ".intel_syntax")) skip_intel = 1; |
| 321 if (strstr(line, ".att_syntax")) skip_intel = 0; |
| 322 |
| 323 /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */ |
| 324 |
| 325 if (line[0] == '#' || line[1] == '#') { |
| 326 |
| 327 if (strstr(line, "#APP")) skip_app = 1; |
| 328 if (strstr(line, "#NO_APP")) skip_app = 0; |
| 329 |
| 330 } |
| 331 |
| 332 /* If we're in the right mood for instrumenting, check for function |
| 333 names or conditional labels. This is a bit messy, but in essence, |
| 334 we want to catch: |
| 335 |
| 336 ^main: - function entry point (always instrumented) |
| 337 ^.L0: - GCC branch label |
| 338 ^.LBB0_0: - clang branch label (but only in clang mode) |
| 339 ^\tjnz foo - conditional branches |
| 340 |
| 341 ...but not: |
| 342 |
| 343 ^# BB#0: - clang comments |
| 344 ^ # BB#0: - ditto |
| 345 ^.Ltmp0: - clang non-branch labels |
| 346 ^.LC0 - GCC non-branch labels |
| 347 ^.LBB0_0: - ditto (when in GCC mode) |
| 348 ^\tjmp foo - non-conditional jumps |
| 349 |
| 350 Additionally, clang and GCC on MacOS X follow a different convention |
| 351 with no leading dots on labels, hence the weird maze of #ifdefs |
| 352 later on. |
| 353 |
| 354 */ |
| 355 |
| 356 if (skip_intel || skip_app || skip_csect || !instr_ok || |
| 357 line[0] == '#' || line[0] == ' ') continue; |
| 358 |
| 359 /* Conditional branch instruction (jnz, etc). We append the instrumentation |
| 360 right after the branch (to instrument the not-taken path) and at the |
| 361 branch destination label (handled later on). */ |
| 362 |
| 363 if (line[0] == '\t') { |
| 364 |
| 365 if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) { |
| 366 |
| 367 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, |
| 368 R(MAP_SIZE)); |
| 369 |
| 370 ins_lines++; |
| 371 |
| 372 } |
| 373 |
| 374 continue; |
| 375 |
| 376 } |
| 377 |
| 378 /* Label of some sort. This may be a branch destination, but we need to |
| 379 tread carefully and account for several different formatting |
| 380 conventions. */ |
| 381 |
| 382 #ifdef __APPLE__ |
| 383 |
| 384 /* Apple: L<whatever><digit>: */ |
| 385 |
| 386 if ((colon_pos = strstr(line, ":"))) { |
| 387 |
| 388 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) { |
| 389 |
| 390 #else |
| 391 |
| 392 /* Everybody else: .L<whatever>: */ |
| 393 |
| 394 if (strstr(line, ":")) { |
| 395 |
| 396 if (line[0] == '.') { |
| 397 |
| 398 #endif /* __APPLE__ */ |
| 399 |
| 400 /* .L0: or LBB0_0: style jump destination */ |
| 401 |
| 402 #ifdef __APPLE__ |
| 403 |
| 404 /* Apple: L<num> / LBB<num> */ |
| 405 |
| 406 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) |
| 407 && R(100) < inst_ratio) { |
| 408 |
| 409 #else |
| 410 |
| 411 /* Apple: .L<num> / .LBB<num> */ |
| 412 |
| 413 if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3))) |
| 414 && R(100) < inst_ratio) { |
| 415 |
| 416 #endif /* __APPLE__ */ |
| 417 |
| 418 /* An optimization is possible here by adding the code only if the |
| 419 label is mentioned in the code in contexts other than call / jmp. |
| 420 That said, this complicates the code by requiring two-pass |
| 421 processing (messy with stdin), and results in a speed gain |
| 422 typically under 10%, because compilers are generally pretty good |
| 423 about not generating spurious intra-function jumps. |
| 424 |
| 425 We use deferred output chiefly to avoid disrupting |
| 426 .Lfunc_begin0-style exception handling calculations (a problem on |
| 427 MacOS X). */ |
| 428 |
| 429 if (!skip_next_label) instrument_next = 1; else skip_next_label = 0; |
| 430 |
| 431 } |
| 432 |
| 433 } else { |
| 434 |
| 435 /* Function label (always instrumented, deferred mode). */ |
| 436 |
| 437 instrument_next = 1; |
| 438 |
| 439 } |
| 440 |
| 441 } |
| 442 |
| 443 } |
| 444 |
| 445 if (ins_lines) |
| 446 fputs(use_64bit ? main_payload_64 : main_payload_32, outf); |
| 447 |
| 448 if (input_file) fclose(inf); |
| 449 fclose(outf); |
| 450 |
| 451 if (!be_quiet) { |
| 452 |
| 453 if (!ins_lines) WARNF("No instrumentation targets found%s.", |
| 454 pass_thru ? " (pass-thru mode)" : ""); |
| 455 else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", |
| 456 ins_lines, use_64bit ? "64" : "32", |
| 457 getenv("AFL_HARDEN") ? "hardened" : "non-hardened", |
| 458 inst_ratio); |
| 459 |
| 460 } |
| 461 |
| 462 } |
| 463 |
| 464 |
| 465 /* Main entry point */ |
| 466 |
| 467 int main(int argc, char** argv) { |
| 468 |
| 469 s32 pid; |
| 470 u32 rand_seed; |
| 471 int status; |
| 472 u8* inst_ratio_str = getenv("AFL_INST_RATIO"); |
| 473 |
| 474 struct timeval tv; |
| 475 struct timezone tz; |
| 476 |
| 477 clang_mode = !!getenv(CLANG_ENV_VAR); |
| 478 |
| 479 if (isatty(2) && !getenv("AFL_QUIET")) { |
| 480 |
| 481 SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n"); |
| 482 |
| 483 } else be_quiet = 1; |
| 484 |
| 485 if (argc < 2) { |
| 486 |
| 487 SAYF("\n" |
| 488 "This is a helper application for afl-fuzz. It is a wrapper around GNU
'as',\n" |
| 489 "executed by the toolchain whenever using afl-gcc or afl-clang. You pro
bably\n" |
| 490 "don't want to run this program directly.\n\n" |
| 491 |
| 492 "Rarely, when dealing with extremely complex projects, it may be advisa
ble to\n" |
| 493 "set AFL_INST_RATIO to a value less than 100 in order to reduce the odd
s of\n" |
| 494 "instrumenting every discovered branch.\n\n"); |
| 495 |
| 496 exit(1); |
| 497 |
| 498 } |
| 499 |
| 500 gettimeofday(&tv, &tz); |
| 501 |
| 502 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); |
| 503 |
| 504 srandom(rand_seed); |
| 505 |
| 506 edit_params(argc, argv); |
| 507 |
| 508 if (inst_ratio_str) { |
| 509 |
| 510 if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) |
| 511 FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)"); |
| 512 |
| 513 } |
| 514 |
| 515 if (getenv(AS_LOOP_ENV_VAR)) |
| 516 FATAL("Endless loop when calling 'as' (remove '.' from your PATH)"); |
| 517 |
| 518 setenv(AS_LOOP_ENV_VAR, "1", 1); |
| 519 |
| 520 /* When compiling with ASAN, we don't have a particularly elegant way to skip |
| 521 ASAN-specific branches. But we can probabilistically compensate for |
| 522 that... */ |
| 523 |
| 524 if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) inst_ratio /= 3; |
| 525 |
| 526 if (!just_version) add_instrumentation(); |
| 527 |
| 528 if (!(pid = fork())) { |
| 529 |
| 530 execvp(as_params[0], (char**)as_params); |
| 531 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]); |
| 532 |
| 533 } |
| 534 |
| 535 if (pid < 0) PFATAL("fork() failed"); |
| 536 |
| 537 if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); |
| 538 |
| 539 if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file); |
| 540 |
| 541 exit(WEXITSTATUS(status)); |
| 542 |
| 543 } |
| 544 |
OLD | NEW |