| Index: third_party/afl/src/afl-cmin
|
| diff --git a/third_party/afl/src/afl-cmin b/third_party/afl/src/afl-cmin
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..5af4b7744c4a28e312eb661b166cd8625b8cd213
|
| --- /dev/null
|
| +++ b/third_party/afl/src/afl-cmin
|
| @@ -0,0 +1,447 @@
|
| +#!/usr/bin/env bash
|
| +#
|
| +# american fuzzy lop - corpus minimization tool
|
| +# ---------------------------------------------
|
| +#
|
| +# Written and maintained by Michal Zalewski <lcamtuf@google.com>
|
| +#
|
| +# Copyright 2014, 2015 Google Inc. All rights reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at:
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# This tool tries to find the smallest subset of files in the input directory
|
| +# that still trigger the full range of instrumentation data points seen in
|
| +# the starting corpus. This has two uses:
|
| +#
|
| +# - Screening large corpora of input files before using them as a seed for
|
| +# afl-fuzz. The tool will remove functionally redundant files and likely
|
| +# leave you with a much smaller set.
|
| +#
|
| +# (In this case, you probably also want to consider running afl-tmin on
|
| +# the individual files later on to reduce their size.)
|
| +#
|
| +# - Minimizing the corpus generated organically by afl-fuzz, perhaps when
|
| +# planning to feed it to more resource-intensive tools. The tool achieves
|
| +# this by removing all entries that used to trigger unique behaviors in the
|
| +# past, but have been made obsolete by later finds.
|
| +#
|
| +# Note that the tool doesn't modify the files themselves. For that, you want
|
| +# afl-tmin.
|
| +#
|
| +# This script must use bash because other shells may have hardcoded limits on
|
| +# array sizes.
|
| +#
|
| +
|
| +echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>"
|
| +echo
|
| +
|
| +#########
|
| +# SETUP #
|
| +#########
|
| +
|
| +# Process command-line options...
|
| +
|
| +MEM_LIMIT=100
|
| +TIMEOUT=none
|
| +
|
| +unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
|
| + AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
|
| +
|
| +while getopts "+i:o:f:m:t:eQC" opt; do
|
| +
|
| + case "$opt" in
|
| +
|
| + "i")
|
| + IN_DIR="$OPTARG"
|
| + ;;
|
| +
|
| + "o")
|
| + OUT_DIR="$OPTARG"
|
| + ;;
|
| + "f")
|
| + STDIN_FILE="$OPTARG"
|
| + ;;
|
| + "m")
|
| + MEM_LIMIT="$OPTARG"
|
| + MEM_LIMIT_GIVEN=1
|
| + ;;
|
| + "t")
|
| + TIMEOUT="$OPTARG"
|
| + ;;
|
| + "e")
|
| + EXTRA_PAR="$EXTRA_PAR -e"
|
| + ;;
|
| + "C")
|
| + export AFL_CMIN_CRASHES_ONLY=1
|
| + ;;
|
| + "Q")
|
| + EXTRA_PAR="$EXTRA_PAR -Q"
|
| + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
|
| + QEMU_MODE=1
|
| + ;;
|
| + "?")
|
| + exit 1
|
| + ;;
|
| +
|
| + esac
|
| +
|
| +done
|
| +
|
| +shift $((OPTIND-1))
|
| +
|
| +TARGET_BIN="$1"
|
| +
|
| +if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
|
| +
|
| + cat 1>&2 <<_EOF_
|
| +Usage: $0 [ options ] -- /path/to/target_app [ ... ]
|
| +
|
| +Required parameters:
|
| +
|
| + -i dir - input directory with the starting corpus
|
| + -o dir - output directory for minimized files
|
| +
|
| +Execution control settings:
|
| +
|
| + -f file - location read by the fuzzed program (stdin)
|
| + -m megs - memory limit for child process ($MEM_LIMIT MB)
|
| + -t msec - run time limit for child process (none)
|
| + -Q - use binary-only instrumentation (QEMU mode)
|
| +
|
| +Minimization settings:
|
| +
|
| + -C - keep crashing inputs, reject everything else
|
| + -e - solve for edge coverage only, ignore hit counts
|
| +
|
| +For additional tips, please consult docs/README.
|
| +
|
| +_EOF_
|
| + exit 1
|
| +fi
|
| +
|
| +# Do a sanity check to discourage the use of /tmp, since we can't really
|
| +# handle this safely from a shell script.
|
| +
|
| +echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
|
| +T1="$?"
|
| +
|
| +echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
|
| +T2="$?"
|
| +
|
| +echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
|
| +T3="$?"
|
| +
|
| +echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
|
| +T4="$?"
|
| +
|
| +echo "$PWD" | grep -qE '^(/var)?/tmp/'
|
| +T5="$?"
|
| +
|
| +if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
|
| + echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
|
| + exit 1
|
| +fi
|
| +
|
| +# If @@ is specified, but there's no -f, let's come up with a temporary input
|
| +# file name.
|
| +
|
| +TRACE_DIR="$OUT_DIR/.traces"
|
| +
|
| +if [ "$STDIN_FILE" = "" ]; then
|
| +
|
| + if echo "$*" | grep -qF '@@'; then
|
| + STDIN_FILE="$TRACE_DIR/.cur_input"
|
| + fi
|
| +
|
| +fi
|
| +
|
| +# Check for obvious errors.
|
| +
|
| +if [ ! "$MEM_LIMIT" = "none" ]; then
|
| +
|
| + if [ "$MEM_LIMIT" -lt "5" ]; then
|
| + echo "[-] Error: dangerously low memory limit." 1>&2
|
| + exit 1
|
| + fi
|
| +
|
| +fi
|
| +
|
| +if [ ! "$TIMEOUT" = "none" ]; then
|
| +
|
| + if [ "$TIMEOUT" -lt "10" ]; then
|
| + echo "[-] Error: dangerously low timeout." 1>&2
|
| + exit 1
|
| + fi
|
| +
|
| +fi
|
| +
|
| +if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
|
| +
|
| + TNEW="`which "$TARGET_BIN" 2>/dev/null`"
|
| +
|
| + if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
|
| + echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
|
| + exit 1
|
| + fi
|
| +
|
| + TARGET_BIN="$TNEW"
|
| +
|
| +fi
|
| +
|
| +if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
|
| +
|
| + if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
|
| + echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
|
| + exit 1
|
| + fi
|
| +
|
| +fi
|
| +
|
| +if [ ! -d "$IN_DIR" ]; then
|
| + echo "[-] Error: directory '$IN_DIR' not found." 1>&2
|
| + exit 1
|
| +fi
|
| +
|
| +test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
|
| +
|
| +find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
|
| +rm -rf "$TRACE_DIR" 2>/dev/null
|
| +
|
| +rmdir "$OUT_DIR" 2>/dev/null
|
| +
|
| +if [ -d "$OUT_DIR" ]; then
|
| + echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
|
| + exit 1
|
| +fi
|
| +
|
| +mkdir -m 700 -p "$TRACE_DIR" || exit 1
|
| +
|
| +if [ ! "$STDIN_FILE" = "" ]; then
|
| + rm -f "$STDIN_FILE" || exit 1
|
| + touch "$STDIN_FILE" || exit 1
|
| +fi
|
| +
|
| +if [ "$AFL_PATH" = "" ]; then
|
| + SHOWMAP="${0%/afl-cmin}/afl-showmap"
|
| +else
|
| + SHOWMAP="$AFL_PATH/afl-showmap"
|
| +fi
|
| +
|
| +if [ ! -x "$SHOWMAP" ]; then
|
| + echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
|
| + rm -rf "$TRACE_DIR"
|
| + exit 1
|
| +fi
|
| +
|
| +IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
|
| +
|
| +if [ "$IN_COUNT" = "0" ]; then
|
| + echo "No inputs in the target directory - nothing to be done."
|
| + rm -rf "$TRACE_DIR"
|
| + exit 1
|
| +fi
|
| +
|
| +FIRST_FILE=`ls "$IN_DIR" | head -1`
|
| +
|
| +if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
|
| + CP_TOOL=ln
|
| +else
|
| + CP_TOOL=cp
|
| +fi
|
| +
|
| +# Make sure that we can actually get anything out of afl-showmap before we
|
| +# waste too much time.
|
| +
|
| +echo "[*] Testing the target binary..."
|
| +
|
| +if [ "$STDIN_FILE" = "" ]; then
|
| +
|
| + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
|
| +
|
| +else
|
| +
|
| + cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
|
| + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
|
| +
|
| +fi
|
| +
|
| +FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
|
| +
|
| +if [ "$FIRST_COUNT" -gt "0" ]; then
|
| +
|
| + echo "[+] OK, $FIRST_COUNT tuples recorded."
|
| +
|
| +else
|
| +
|
| + echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
|
| + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
|
| + exit 1
|
| +
|
| +fi
|
| +
|
| +# Let's roll!
|
| +
|
| +#############################
|
| +# STEP 1: COLLECTING TRACES #
|
| +#############################
|
| +
|
| +echo "[*] Obtaining traces for input files in '$IN_DIR'..."
|
| +
|
| +(
|
| +
|
| + CUR=0
|
| +
|
| + if [ "$STDIN_FILE" = "" ]; then
|
| +
|
| + while read -r fn; do
|
| +
|
| + CUR=$((CUR+1))
|
| + printf "\\r Processing file $CUR/$IN_COUNT... "
|
| +
|
| + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
|
| +
|
| + done < <(ls "$IN_DIR")
|
| +
|
| + else
|
| +
|
| + while read -r fn; do
|
| +
|
| + CUR=$((CUR+1))
|
| + printf "\\r Processing file $CUR/$IN_COUNT... "
|
| +
|
| + cp "$IN_DIR/$fn" "$STDIN_FILE"
|
| +
|
| + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
|
| +
|
| + done < <(ls "$IN_DIR")
|
| +
|
| +
|
| + fi
|
| +
|
| +)
|
| +
|
| +echo
|
| +
|
| +##########################
|
| +# STEP 2: SORTING TUPLES #
|
| +##########################
|
| +
|
| +# With this out of the way, we sort all tuples by popularity across all
|
| +# datasets. The reasoning here is that we won't be able to avoid the files
|
| +# that trigger unique tuples anyway, so we will want to start with them and
|
| +# see what's left.
|
| +
|
| +echo "[*] Sorting trace sets (this may take a while)..."
|
| +
|
| +ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
|
| + sort | uniq -c | sort -n >"$TRACE_DIR/.all_uniq"
|
| +
|
| +TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
|
| +
|
| +echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
|
| +
|
| +#####################################
|
| +# STEP 3: SELECTING CANDIDATE FILES #
|
| +#####################################
|
| +
|
| +# The next step is to find the best candidate for each tuple. The "best"
|
| +# part is understood simply as the smallest input that includes a particular
|
| +# tuple in its trace. Empirical evidence suggests that this produces smaller
|
| +# datasets than more involved algorithms that could be still pulled off in
|
| +# a shell script.
|
| +
|
| +echo "[*] Finding best candidates for each tuple..."
|
| +
|
| +CUR=0
|
| +
|
| +while read -r fn; do
|
| +
|
| + CUR=$((CUR+1))
|
| + printf "\\r Processing file $CUR/$IN_COUNT... "
|
| +
|
| + sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
|
| +
|
| +done < <(ls -rS "$IN_DIR")
|
| +
|
| +echo
|
| +
|
| +##############################
|
| +# STEP 4: LOADING CANDIDATES #
|
| +##############################
|
| +
|
| +# At this point, we have a file of tuple-file pairs, sorted by file size
|
| +# in ascending order (as a consequence of ls -rS). By doing sort keyed
|
| +# only by tuple (-k 1,1) and configured to output only the first line for
|
| +# every key (-s -u), we end up with the smallest file for each tuple.
|
| +
|
| +echo "[*] Sorting candidate list (be patient)..."
|
| +
|
| +sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
|
| + sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
|
| +
|
| +if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
|
| + echo "[-] Error: no traces obtained from test cases, check syntax!"
|
| + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
|
| + exit 1
|
| +fi
|
| +
|
| +# The sed command converted the sorted list to a shell script that populates
|
| +# BEST_FILE[tuple]="fname". Let's load that!
|
| +
|
| +. "$TRACE_DIR/.candidate_script"
|
| +
|
| +##########################
|
| +# STEP 5: WRITING OUTPUT #
|
| +##########################
|
| +
|
| +# The final trick is to grab the top pick for each tuple, unless said tuple is
|
| +# already set due to the inclusion of an earlier candidate; and then put all
|
| +# tuples associated with the newly-added file to the "already have" list. The
|
| +# loop works from least popular tuples and toward the most common ones.
|
| +
|
| +echo "[*] Processing candidates and writing output files..."
|
| +
|
| +CUR=0
|
| +
|
| +touch "$TRACE_DIR/.already_have"
|
| +
|
| +while read -r cnt tuple; do
|
| +
|
| + CUR=$((CUR+1))
|
| + printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
|
| +
|
| + # If we already have this tuple, skip it.
|
| +
|
| + grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
|
| +
|
| + FN=${BEST_FILE[tuple]}
|
| +
|
| + $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
|
| +
|
| + if [ "$((CUR % 5))" = "0" ]; then
|
| + sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
|
| + mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
|
| + else
|
| + cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
|
| + fi
|
| +
|
| +done <"$TRACE_DIR/.all_uniq"
|
| +
|
| +echo
|
| +
|
| +OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
|
| +
|
| +if [ "$OUT_COUNT" = "1" ]; then
|
| + echo "[!] WARNING: All test cases had the same traces, check syntax!"
|
| +fi
|
| +
|
| +echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
|
| +echo
|
| +
|
| +test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
|
| +
|
| +exit 0
|
|
|