Index: third_party/afl/src/afl-cmin |
diff --git a/third_party/afl/src/afl-cmin b/third_party/afl/src/afl-cmin |
new file mode 100755 |
index 0000000000000000000000000000000000000000..5af4b7744c4a28e312eb661b166cd8625b8cd213 |
--- /dev/null |
+++ b/third_party/afl/src/afl-cmin |
@@ -0,0 +1,447 @@ |
+#!/usr/bin/env bash |
+# |
+# american fuzzy lop - corpus minimization tool |
+# --------------------------------------------- |
+# |
+# Written and maintained by Michal Zalewski <lcamtuf@google.com> |
+# |
+# Copyright 2014, 2015 Google Inc. All rights reserved. |
+# |
+# Licensed under the Apache License, Version 2.0 (the "License"); |
+# you may not use this file except in compliance with the License. |
+# You may obtain a copy of the License at: |
+# |
+# http://www.apache.org/licenses/LICENSE-2.0 |
+# |
+# This tool tries to find the smallest subset of files in the input directory |
+# that still trigger the full range of instrumentation data points seen in |
+# the starting corpus. This has two uses: |
+# |
+# - Screening large corpora of input files before using them as a seed for |
+# afl-fuzz. The tool will remove functionally redundant files and likely |
+# leave you with a much smaller set. |
+# |
+# (In this case, you probably also want to consider running afl-tmin on |
+# the individual files later on to reduce their size.) |
+# |
+# - Minimizing the corpus generated organically by afl-fuzz, perhaps when |
+# planning to feed it to more resource-intensive tools. The tool achieves |
+# this by removing all entries that used to trigger unique behaviors in the |
+# past, but have been made obsolete by later finds. |
+# |
+# Note that the tool doesn't modify the files themselves. For that, you want |
+# afl-tmin. |
+# |
+# This script must use bash because other shells may have hardcoded limits on |
+# array sizes. |
+# |
+ |
+echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>" |
+echo |
+ |
+######### |
+# SETUP # |
+######### |
+ |
+# Process command-line options... |
+ |
+MEM_LIMIT=100 |
+TIMEOUT=none |
+ |
+unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ |
+ AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE |
+ |
+while getopts "+i:o:f:m:t:eQC" opt; do |
+ |
+ case "$opt" in |
+ |
+ "i") |
+ IN_DIR="$OPTARG" |
+ ;; |
+ |
+ "o") |
+ OUT_DIR="$OPTARG" |
+ ;; |
+ "f") |
+ STDIN_FILE="$OPTARG" |
+ ;; |
+ "m") |
+ MEM_LIMIT="$OPTARG" |
+ MEM_LIMIT_GIVEN=1 |
+ ;; |
+ "t") |
+ TIMEOUT="$OPTARG" |
+ ;; |
+ "e") |
+ EXTRA_PAR="$EXTRA_PAR -e" |
+ ;; |
+ "C") |
+ export AFL_CMIN_CRASHES_ONLY=1 |
+ ;; |
+ "Q") |
+ EXTRA_PAR="$EXTRA_PAR -Q" |
+ test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 |
+ QEMU_MODE=1 |
+ ;; |
+ "?") |
+ exit 1 |
+ ;; |
+ |
+ esac |
+ |
+done |
+ |
+shift $((OPTIND-1)) |
+ |
+TARGET_BIN="$1" |
+ |
+if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then |
+ |
+ cat 1>&2 <<_EOF_ |
+Usage: $0 [ options ] -- /path/to/target_app [ ... ] |
+ |
+Required parameters: |
+ |
+ -i dir - input directory with the starting corpus |
+ -o dir - output directory for minimized files |
+ |
+Execution control settings: |
+ |
+ -f file - location read by the fuzzed program (stdin) |
+ -m megs - memory limit for child process ($MEM_LIMIT MB) |
+ -t msec - run time limit for child process (none) |
+ -Q - use binary-only instrumentation (QEMU mode) |
+ |
+Minimization settings: |
+ |
+ -C - keep crashing inputs, reject everything else |
+ -e - solve for edge coverage only, ignore hit counts |
+ |
+For additional tips, please consult docs/README. |
+ |
+_EOF_ |
+ exit 1 |
+fi |
+ |
+# Do a sanity check to discourage the use of /tmp, since we can't really |
+# handle this safely from a shell script. |
+ |
+echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' |
+T1="$?" |
+ |
+echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' |
+T2="$?" |
+ |
+echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' |
+T3="$?" |
+ |
+echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' |
+T4="$?" |
+ |
+echo "$PWD" | grep -qE '^(/var)?/tmp/' |
+T5="$?" |
+ |
+if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then |
+ echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 |
+ exit 1 |
+fi |
+ |
+# If @@ is specified, but there's no -f, let's come up with a temporary input |
+# file name. |
+ |
+TRACE_DIR="$OUT_DIR/.traces" |
+ |
+if [ "$STDIN_FILE" = "" ]; then |
+ |
+ if echo "$*" | grep -qF '@@'; then |
+ STDIN_FILE="$TRACE_DIR/.cur_input" |
+ fi |
+ |
+fi |
+ |
+# Check for obvious errors. |
+ |
+if [ ! "$MEM_LIMIT" = "none" ]; then |
+ |
+ if [ "$MEM_LIMIT" -lt "5" ]; then |
+ echo "[-] Error: dangerously low memory limit." 1>&2 |
+ exit 1 |
+ fi |
+ |
+fi |
+ |
+if [ ! "$TIMEOUT" = "none" ]; then |
+ |
+ if [ "$TIMEOUT" -lt "10" ]; then |
+ echo "[-] Error: dangerously low timeout." 1>&2 |
+ exit 1 |
+ fi |
+ |
+fi |
+ |
+if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then |
+ |
+ TNEW="`which "$TARGET_BIN" 2>/dev/null`" |
+ |
+ if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then |
+ echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 |
+ exit 1 |
+ fi |
+ |
+ TARGET_BIN="$TNEW" |
+ |
+fi |
+ |
+if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then |
+ |
+ if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then |
+ echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 |
+ exit 1 |
+ fi |
+ |
+fi |
+ |
+if [ ! -d "$IN_DIR" ]; then |
+ echo "[-] Error: directory '$IN_DIR' not found." 1>&2 |
+ exit 1 |
+fi |
+ |
+test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" |
+ |
+find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null |
+rm -rf "$TRACE_DIR" 2>/dev/null |
+ |
+rmdir "$OUT_DIR" 2>/dev/null |
+ |
+if [ -d "$OUT_DIR" ]; then |
+ echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 |
+ exit 1 |
+fi |
+ |
+mkdir -m 700 -p "$TRACE_DIR" || exit 1 |
+ |
+if [ ! "$STDIN_FILE" = "" ]; then |
+ rm -f "$STDIN_FILE" || exit 1 |
+ touch "$STDIN_FILE" || exit 1 |
+fi |
+ |
+if [ "$AFL_PATH" = "" ]; then |
+ SHOWMAP="${0%/afl-cmin}/afl-showmap" |
+else |
+ SHOWMAP="$AFL_PATH/afl-showmap" |
+fi |
+ |
+if [ ! -x "$SHOWMAP" ]; then |
+ echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 |
+ rm -rf "$TRACE_DIR" |
+ exit 1 |
+fi |
+ |
+IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) |
+ |
+if [ "$IN_COUNT" = "0" ]; then |
+ echo "No inputs in the target directory - nothing to be done." |
+ rm -rf "$TRACE_DIR" |
+ exit 1 |
+fi |
+ |
+FIRST_FILE=`ls "$IN_DIR" | head -1` |
+ |
+if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then |
+ CP_TOOL=ln |
+else |
+ CP_TOOL=cp |
+fi |
+ |
+# Make sure that we can actually get anything out of afl-showmap before we |
+# waste too much time. |
+ |
+echo "[*] Testing the target binary..." |
+ |
+if [ "$STDIN_FILE" = "" ]; then |
+ |
+ AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" |
+ |
+else |
+ |
+ cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" |
+ AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null |
+ |
+fi |
+ |
+FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`)) |
+ |
+if [ "$FIRST_COUNT" -gt "0" ]; then |
+ |
+ echo "[+] OK, $FIRST_COUNT tuples recorded." |
+ |
+else |
+ |
+ echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2 |
+ test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" |
+ exit 1 |
+ |
+fi |
+ |
+# Let's roll! |
+ |
+############################# |
+# STEP 1: COLLECTING TRACES # |
+############################# |
+ |
+echo "[*] Obtaining traces for input files in '$IN_DIR'..." |
+ |
+( |
+ |
+ CUR=0 |
+ |
+ if [ "$STDIN_FILE" = "" ]; then |
+ |
+ while read -r fn; do |
+ |
+ CUR=$((CUR+1)) |
+ printf "\\r Processing file $CUR/$IN_COUNT... " |
+ |
+ "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" |
+ |
+ done < <(ls "$IN_DIR") |
+ |
+ else |
+ |
+ while read -r fn; do |
+ |
+ CUR=$((CUR+1)) |
+ printf "\\r Processing file $CUR/$IN_COUNT... " |
+ |
+ cp "$IN_DIR/$fn" "$STDIN_FILE" |
+ |
+ "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null |
+ |
+ done < <(ls "$IN_DIR") |
+ |
+ |
+ fi |
+ |
+) |
+ |
+echo |
+ |
+########################## |
+# STEP 2: SORTING TUPLES # |
+########################## |
+ |
+# With this out of the way, we sort all tuples by popularity across all |
+# datasets. The reasoning here is that we won't be able to avoid the files |
+# that trigger unique tuples anyway, so we will want to start with them and |
+# see what's left. |
+ |
+echo "[*] Sorting trace sets (this may take a while)..." |
+ |
+ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \ |
+ sort | uniq -c | sort -n >"$TRACE_DIR/.all_uniq" |
+ |
+TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) |
+ |
+echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." |
+ |
+##################################### |
+# STEP 3: SELECTING CANDIDATE FILES # |
+##################################### |
+ |
+# The next step is to find the best candidate for each tuple. The "best" |
+# part is understood simply as the smallest input that includes a particular |
+# tuple in its trace. Empirical evidence suggests that this produces smaller |
+# datasets than more involved algorithms that could be still pulled off in |
+# a shell script. |
+ |
+echo "[*] Finding best candidates for each tuple..." |
+ |
+CUR=0 |
+ |
+while read -r fn; do |
+ |
+ CUR=$((CUR+1)) |
+ printf "\\r Processing file $CUR/$IN_COUNT... " |
+ |
+ sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" |
+ |
+done < <(ls -rS "$IN_DIR") |
+ |
+echo |
+ |
+############################## |
+# STEP 4: LOADING CANDIDATES # |
+############################## |
+ |
+# At this point, we have a file of tuple-file pairs, sorted by file size |
+# in ascending order (as a consequence of ls -rS). By doing sort keyed |
+# only by tuple (-k 1,1) and configured to output only the first line for |
+# every key (-s -u), we end up with the smallest file for each tuple. |
+ |
+echo "[*] Sorting candidate list (be patient)..." |
+ |
+sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ |
+ sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" |
+ |
+if [ ! -s "$TRACE_DIR/.candidate_script" ]; then |
+ echo "[-] Error: no traces obtained from test cases, check syntax!" |
+ test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" |
+ exit 1 |
+fi |
+ |
+# The sed command converted the sorted list to a shell script that populates |
+# BEST_FILE[tuple]="fname". Let's load that! |
+ |
+. "$TRACE_DIR/.candidate_script" |
+ |
+########################## |
+# STEP 5: WRITING OUTPUT # |
+########################## |
+ |
+# The final trick is to grab the top pick for each tuple, unless said tuple is |
+# already set due to the inclusion of an earlier candidate; and then put all |
+# tuples associated with the newly-added file to the "already have" list. The |
+# loop works from least popular tuples and toward the most common ones. |
+ |
+echo "[*] Processing candidates and writing output files..." |
+ |
+CUR=0 |
+ |
+touch "$TRACE_DIR/.already_have" |
+ |
+while read -r cnt tuple; do |
+ |
+ CUR=$((CUR+1)) |
+ printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " |
+ |
+ # If we already have this tuple, skip it. |
+ |
+ grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue |
+ |
+ FN=${BEST_FILE[tuple]} |
+ |
+ $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" |
+ |
+ if [ "$((CUR % 5))" = "0" ]; then |
+ sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" |
+ mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" |
+ else |
+ cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" |
+ fi |
+ |
+done <"$TRACE_DIR/.all_uniq" |
+ |
+echo |
+ |
+OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` |
+ |
+if [ "$OUT_COUNT" = "1" ]; then |
+ echo "[!] WARNING: All test cases had the same traces, check syntax!" |
+fi |
+ |
+echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." |
+echo |
+ |
+test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" |
+ |
+exit 0 |