Index: third_party/afl/src/afl-as.c |
diff --git a/third_party/afl/src/afl-as.c b/third_party/afl/src/afl-as.c |
new file mode 100644 |
index 0000000000000000000000000000000000000000..7686fe8a802873467dac47b4571fd96e545521c9 |
--- /dev/null |
+++ b/third_party/afl/src/afl-as.c |
@@ -0,0 +1,544 @@ |
+/* |
+ american fuzzy lop - wrapper for GNU as |
+ --------------------------------------- |
+ |
+ Written and maintained by Michal Zalewski <lcamtuf@google.com> |
+ |
+ Copyright 2013, 2014, 2015 Google Inc. All rights reserved. |
+ |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
+ you may not use this file except in compliance with the License. |
+ You may obtain a copy of the License at: |
+ |
+ http://www.apache.org/licenses/LICENSE-2.0 |
+ |
+ The sole purpose of this wrapper is to preprocess assembly files generated |
+ by GCC / clang and inject the instrumentation bits included from afl-as.h. It |
+ is automatically invoked by the toolchain when compiling programs using |
+ afl-gcc / afl-clang. |
+ |
+ Note that it's an explicit non-goal to instrument hand-written assembly, |
+ be it in separate .s files or in __asm__ blocks. The only aspiration this |
+ utility has right now is to be able to skip them gracefully and allow the |
+ compilation process to continue. |
+ |
+ That said, see experimental/clang_asm_normalize/ for a solution that may |
+ allow clang users to make things work even with hand-crafted assembly. Just |
+ note that there is no equivalent for GCC. |
+ |
+ */ |
+ |
+#define AFL_MAIN |
+ |
+#include "config.h" |
+#include "types.h" |
+#include "debug.h" |
+#include "alloc-inl.h" |
+ |
+#include "afl-as.h" |
+ |
+#include <stdio.h> |
+#include <unistd.h> |
+#include <stdlib.h> |
+#include <string.h> |
+#include <time.h> |
+#include <ctype.h> |
+#include <fcntl.h> |
+ |
+#include <sys/wait.h> |
+#include <sys/time.h> |
+ |
+static u8** as_params; /* Parameters passed to the real 'as' */ |
+ |
+static u8* input_file; /* Originally specified input file */ |
+static u8* modified_file; /* Instrumented file for the real 'as' */ |
+ |
+static u8 be_quiet, /* Quiet mode (no stderr output) */ |
+ clang_mode, /* Running in clang mode? */ |
+ pass_thru, /* Just pass data through? */ |
+ just_version; /* Just show version? */ |
+ |
+static u32 inst_ratio = 100, /* Instrumentation probability (%) */ |
+ as_par_cnt = 1; /* Number of params to 'as' */ |
+ |
+/* If we don't find --32 or --64 in the command line, default to |
+ instrumentation for whichever mode we were compiled with. This is not |
+ perfect, but should do the trick for almost all use cases. */ |
+ |
+#ifdef __x86_64__ |
+ |
+static u8 use_64bit = 1; |
+ |
+#else |
+ |
+static u8 use_64bit = 0; |
+ |
+#ifdef __APPLE__ |
+# error "Sorry, 32-bit Apple platforms are not supported." |
+#endif /* __APPLE__ */ |
+ |
+#endif /* ^__x86_64__ */ |
+ |
+ |
+/* Examine and modify parameters to pass to 'as'. Note that the file name |
+ is always the last parameter passed by GCC, so we exploit this property |
+ to keep the code simple. */ |
+ |
+static void edit_params(int argc, char** argv) { |
+ |
+ u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); |
+ u32 i; |
+ |
+#ifdef __APPLE__ |
+ |
+ u8 use_clang_as = 0; |
+ |
+ /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work |
+ with the code generated by newer versions of clang that are hand-built |
+ by the user. See the thread here: http://goo.gl/HBWDtn. |
+ |
+ To work around this, when using clang and running without AFL_AS |
+ specified, we will actually call 'clang -c' instead of 'as -q' to |
+ compile the assembly file. |
+ |
+ The tools aren't cmdline-compatible, but at least for now, we can |
+ seemingly get away with this by making only very minor tweaks. Thanks |
+ to Nico Weber for the idea. */ |
+ |
+ if (clang_mode && !afl_as) { |
+ |
+ use_clang_as = 1; |
+ |
+ afl_as = getenv("AFL_CC"); |
+ if (!afl_as) afl_as = getenv("AFL_CXX"); |
+ if (!afl_as) afl_as = "clang"; |
+ |
+ } |
+ |
+#endif /* __APPLE__ */ |
+ |
+ /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR |
+ is not set. We need to check these non-standard variables to properly |
+ handle the pass_thru logic later on. */ |
+ |
+ if (!tmp_dir) tmp_dir = getenv("TEMP"); |
+ if (!tmp_dir) tmp_dir = getenv("TMP"); |
+ if (!tmp_dir) tmp_dir = "/tmp"; |
+ |
+ as_params = ck_alloc((argc + 32) * sizeof(u8*)); |
+ |
+ as_params[0] = afl_as ? afl_as : (u8*)"as"; |
+ |
+ as_params[argc] = 0; |
+ |
+ for (i = 1; i < argc - 1; i++) { |
+ |
+ if (!strcmp(argv[i], "--64")) use_64bit = 1; |
+ else if (!strcmp(argv[i], "--32")) use_64bit = 0; |
+ |
+#ifdef __APPLE__ |
+ |
+ /* The Apple case is a bit different... */ |
+ |
+ if (!strcmp(argv[i], "-arch") && i + 1 < argc) { |
+ |
+ if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1; |
+ else if (!strcmp(argv[i + 1], "i386")) |
+ FATAL("Sorry, 32-bit Apple platforms are not supported."); |
+ |
+ } |
+ |
+ /* Strip options that set the preference for a particular upstream |
+ assembler in Xcode. */ |
+ |
+ if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q"))) |
+ continue; |
+ |
+#endif /* __APPLE__ */ |
+ |
+ as_params[as_par_cnt++] = argv[i]; |
+ |
+ } |
+ |
+#ifdef __APPLE__ |
+ |
+ /* When calling clang as the upstream assembler, append -c -x assembler |
+ and hope for the best. */ |
+ |
+ if (use_clang_as) { |
+ |
+ as_params[as_par_cnt++] = "-c"; |
+ as_params[as_par_cnt++] = "-x"; |
+ as_params[as_par_cnt++] = "assembler"; |
+ |
+ } |
+ |
+#endif /* __APPLE__ */ |
+ |
+ input_file = argv[argc - 1]; |
+ |
+ if (input_file[0] == '-') { |
+ |
+ if (!strcmp(input_file + 1, "-version")) { |
+ just_version = 1; |
+ modified_file = input_file; |
+ goto wrap_things_up; |
+ } |
+ |
+ if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)"); |
+ else input_file = NULL; |
+ |
+ } else { |
+ |
+ /* Check if this looks like a standard invocation as a part of an attempt |
+ to compile a program, rather than using gcc on an ad-hoc .s file in |
+ a format we may not understand. This works around an issue compiling |
+ NSS. */ |
+ |
+ if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) && |
+ strncmp(input_file, "/var/tmp/", 9) && |
+ strncmp(input_file, "/tmp/", 5)) pass_thru = 1; |
+ |
+ } |
+ |
+ modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(), |
+ (u32)time(NULL)); |
+ |
+wrap_things_up: |
+ |
+ as_params[as_par_cnt++] = modified_file; |
+ as_params[as_par_cnt] = NULL; |
+ |
+} |
+ |
+ |
+/* Process input file, generate modified_file. Insert instrumentation in all |
+ the appropriate places. */ |
+ |
+static void add_instrumentation(void) { |
+ |
+ static u8 line[MAX_LINE]; |
+ |
+ FILE* inf; |
+ FILE* outf; |
+ s32 outfd; |
+ u32 ins_lines = 0; |
+ |
+ u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, |
+ skip_intel = 0, skip_app = 0, instrument_next = 0; |
+ |
+#ifdef __APPLE__ |
+ |
+ u8* colon_pos; |
+ |
+#endif /* __APPLE__ */ |
+ |
+ if (input_file) { |
+ |
+ inf = fopen(input_file, "r"); |
+ if (!inf) PFATAL("Unable to read '%s'", input_file); |
+ |
+ } else inf = stdin; |
+ |
+ outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); |
+ |
+ if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file); |
+ |
+ outf = fdopen(outfd, "w"); |
+ |
+ if (!outf) PFATAL("fdopen() failed"); |
+ |
+ while (fgets(line, MAX_LINE, inf)) { |
+ |
+ /* In some cases, we want to defer writing the instrumentation trampoline |
+ until after all the labels, macros, comments, etc. If we're in this |
+ mode, and if the line starts with a tab followed by a character, dump |
+ the trampoline now. */ |
+ |
+ if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok && |
+ instrument_next && line[0] == '\t' && isalpha(line[1])) { |
+ |
+ fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, |
+ R(MAP_SIZE)); |
+ |
+ instrument_next = 0; |
+ ins_lines++; |
+ |
+ } |
+ |
+ /* Output the actual line, call it a day in pass-thru mode. */ |
+ |
+ fputs(line, outf); |
+ |
+ if (pass_thru) continue; |
+ |
+ /* All right, this is where the actual fun begins. For one, we only want to |
+ instrument the .text section. So, let's keep track of that in processed |
+ files - and let's set instr_ok accordingly. */ |
+ |
+ if (line[0] == '\t' && line[1] == '.') { |
+ |
+ /* OpenBSD puts jump tables directly inline with the code, which is |
+ a bit annoying. They use a specific format of p2align directives |
+ around them, so we use that as a signal. */ |
+ |
+ if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) && |
+ isdigit(line[10]) && line[11] == '\n') skip_next_label = 1; |
+ |
+ if (!strncmp(line + 2, "text\n", 5) || |
+ !strncmp(line + 2, "section\t.text", 13) || |
+ !strncmp(line + 2, "section\t__TEXT,__text", 21) || |
+ !strncmp(line + 2, "section __TEXT,__text", 21)) { |
+ instr_ok = 1; |
+ continue; |
+ } |
+ |
+ if (!strncmp(line + 2, "section\t", 8) || |
+ !strncmp(line + 2, "section ", 8) || |
+ !strncmp(line + 2, "bss\n", 4) || |
+ !strncmp(line + 2, "data\n", 5)) { |
+ instr_ok = 0; |
+ continue; |
+ } |
+ |
+ } |
+ |
+ /* Detect off-flavor assembly (rare, happens in gdb). When this is |
+ encountered, we set skip_csect until the opposite directive is |
+ seen, and we do not instrument. */ |
+ |
+ if (strstr(line, ".code")) { |
+ |
+ if (strstr(line, ".code32")) skip_csect = use_64bit; |
+ if (strstr(line, ".code64")) skip_csect = !use_64bit; |
+ |
+ } |
+ |
+ /* Detect syntax changes, as could happen with hand-written assembly. |
+ Skip Intel blocks, resume instrumentation when back to AT&T. */ |
+ |
+ if (strstr(line, ".intel_syntax")) skip_intel = 1; |
+ if (strstr(line, ".att_syntax")) skip_intel = 0; |
+ |
+ /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */ |
+ |
+ if (line[0] == '#' || line[1] == '#') { |
+ |
+ if (strstr(line, "#APP")) skip_app = 1; |
+ if (strstr(line, "#NO_APP")) skip_app = 0; |
+ |
+ } |
+ |
+ /* If we're in the right mood for instrumenting, check for function |
+ names or conditional labels. This is a bit messy, but in essence, |
+ we want to catch: |
+ |
+ ^main: - function entry point (always instrumented) |
+ ^.L0: - GCC branch label |
+ ^.LBB0_0: - clang branch label (but only in clang mode) |
+ ^\tjnz foo - conditional branches |
+ |
+ ...but not: |
+ |
+ ^# BB#0: - clang comments |
+ ^ # BB#0: - ditto |
+ ^.Ltmp0: - clang non-branch labels |
+ ^.LC0 - GCC non-branch labels |
+ ^.LBB0_0: - ditto (when in GCC mode) |
+ ^\tjmp foo - non-conditional jumps |
+ |
+ Additionally, clang and GCC on MacOS X follow a different convention |
+ with no leading dots on labels, hence the weird maze of #ifdefs |
+ later on. |
+ |
+ */ |
+ |
+ if (skip_intel || skip_app || skip_csect || !instr_ok || |
+ line[0] == '#' || line[0] == ' ') continue; |
+ |
+ /* Conditional branch instruction (jnz, etc). We append the instrumentation |
+ right after the branch (to instrument the not-taken path) and at the |
+ branch destination label (handled later on). */ |
+ |
+ if (line[0] == '\t') { |
+ |
+ if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) { |
+ |
+ fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, |
+ R(MAP_SIZE)); |
+ |
+ ins_lines++; |
+ |
+ } |
+ |
+ continue; |
+ |
+ } |
+ |
+ /* Label of some sort. This may be a branch destination, but we need to |
+ tread carefully and account for several different formatting |
+ conventions. */ |
+ |
+#ifdef __APPLE__ |
+ |
+ /* Apple: L<whatever><digit>: */ |
+ |
+ if ((colon_pos = strstr(line, ":"))) { |
+ |
+ if (line[0] == 'L' && isdigit(*(colon_pos - 1))) { |
+ |
+#else |
+ |
+ /* Everybody else: .L<whatever>: */ |
+ |
+ if (strstr(line, ":")) { |
+ |
+ if (line[0] == '.') { |
+ |
+#endif /* __APPLE__ */ |
+ |
+ /* .L0: or LBB0_0: style jump destination */ |
+ |
+#ifdef __APPLE__ |
+ |
+ /* Apple: L<num> / LBB<num> */ |
+ |
+ if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) |
+ && R(100) < inst_ratio) { |
+ |
+#else |
+ |
+ /* Apple: .L<num> / .LBB<num> */ |
+ |
+ if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3))) |
+ && R(100) < inst_ratio) { |
+ |
+#endif /* __APPLE__ */ |
+ |
+ /* An optimization is possible here by adding the code only if the |
+ label is mentioned in the code in contexts other than call / jmp. |
+ That said, this complicates the code by requiring two-pass |
+ processing (messy with stdin), and results in a speed gain |
+ typically under 10%, because compilers are generally pretty good |
+ about not generating spurious intra-function jumps. |
+ |
+ We use deferred output chiefly to avoid disrupting |
+ .Lfunc_begin0-style exception handling calculations (a problem on |
+ MacOS X). */ |
+ |
+ if (!skip_next_label) instrument_next = 1; else skip_next_label = 0; |
+ |
+ } |
+ |
+ } else { |
+ |
+ /* Function label (always instrumented, deferred mode). */ |
+ |
+ instrument_next = 1; |
+ |
+ } |
+ |
+ } |
+ |
+ } |
+ |
+ if (ins_lines) |
+ fputs(use_64bit ? main_payload_64 : main_payload_32, outf); |
+ |
+ if (input_file) fclose(inf); |
+ fclose(outf); |
+ |
+ if (!be_quiet) { |
+ |
+ if (!ins_lines) WARNF("No instrumentation targets found%s.", |
+ pass_thru ? " (pass-thru mode)" : ""); |
+ else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", |
+ ins_lines, use_64bit ? "64" : "32", |
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened", |
+ inst_ratio); |
+ |
+ } |
+ |
+} |
+ |
+ |
+/* Main entry point */ |
+ |
+int main(int argc, char** argv) { |
+ |
+ s32 pid; |
+ u32 rand_seed; |
+ int status; |
+ u8* inst_ratio_str = getenv("AFL_INST_RATIO"); |
+ |
+ struct timeval tv; |
+ struct timezone tz; |
+ |
+ clang_mode = !!getenv(CLANG_ENV_VAR); |
+ |
+ if (isatty(2) && !getenv("AFL_QUIET")) { |
+ |
+ SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n"); |
+ |
+ } else be_quiet = 1; |
+ |
+ if (argc < 2) { |
+ |
+ SAYF("\n" |
+ "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n" |
+ "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n" |
+ "don't want to run this program directly.\n\n" |
+ |
+ "Rarely, when dealing with extremely complex projects, it may be advisable to\n" |
+ "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n" |
+ "instrumenting every discovered branch.\n\n"); |
+ |
+ exit(1); |
+ |
+ } |
+ |
+ gettimeofday(&tv, &tz); |
+ |
+ rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); |
+ |
+ srandom(rand_seed); |
+ |
+ edit_params(argc, argv); |
+ |
+ if (inst_ratio_str) { |
+ |
+ if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) |
+ FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)"); |
+ |
+ } |
+ |
+ if (getenv(AS_LOOP_ENV_VAR)) |
+ FATAL("Endless loop when calling 'as' (remove '.' from your PATH)"); |
+ |
+ setenv(AS_LOOP_ENV_VAR, "1", 1); |
+ |
+ /* When compiling with ASAN, we don't have a particularly elegant way to skip |
+ ASAN-specific branches. But we can probabilistically compensate for |
+ that... */ |
+ |
+ if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) inst_ratio /= 3; |
+ |
+ if (!just_version) add_instrumentation(); |
+ |
+ if (!(pid = fork())) { |
+ |
+ execvp(as_params[0], (char**)as_params); |
+ FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]); |
+ |
+ } |
+ |
+ if (pid < 0) PFATAL("fork() failed"); |
+ |
+ if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); |
+ |
+ if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file); |
+ |
+ exit(WEXITSTATUS(status)); |
+ |
+} |
+ |