third_party/afl/src/afl-cmin - Issue 2075883002: Add American Fuzzy Lop (afl) to third_party/afl/

Side by Side Diff: third_party/afl/src/afl-cmin

Issue 2075883002: Add American Fuzzy Lop (afl) to third_party/afl/ (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fix nits Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env bash

	2 #

	3 # american fuzzy lop - corpus minimization tool

	4 # ---------------------------------------------

	5 #

	6 # Written and maintained by Michal Zalewski <lcamtuf@google.com>

	7 #

	8 # Copyright 2014, 2015 Google Inc. All rights reserved.

	9 #

	10 # Licensed under the Apache License, Version 2.0 (the "License");

	11 # you may not use this file except in compliance with the License.

	12 # You may obtain a copy of the License at:

	13 #

	14 # http://www.apache.org/licenses/LICENSE-2.0

	15 #

	16 # This tool tries to find the smallest subset of files in the input directory

	17 # that still trigger the full range of instrumentation data points seen in

	18 # the starting corpus. This has two uses:

	19 #

	20 # - Screening large corpora of input files before using them as a seed for

	21 # afl-fuzz. The tool will remove functionally redundant files and likely

	22 # leave you with a much smaller set.

	23 #

	24 # (In this case, you probably also want to consider running afl-tmin on

	25 # the individual files later on to reduce their size.)

	26 #

	27 # - Minimizing the corpus generated organically by afl-fuzz, perhaps when

	28 # planning to feed it to more resource-intensive tools. The tool achieves

	29 # this by removing all entries that used to trigger unique behaviors in the

	30 # past, but have been made obsolete by later finds.

	31 #

	32 # Note that the tool doesn't modify the files themselves. For that, you want

	33 # afl-tmin.

	34 #

	35 # This script must use bash because other shells may have hardcoded limits on

	36 # array sizes.

	37 #

	38

	39 echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>"

	40 echo

	41

	42 #########

	43 # SETUP #

	44 #########

	45

	46 # Process command-line options...

	47

	48 MEM_LIMIT=100

	49 TIMEOUT=none

	50

	51 unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \

	52 AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE

	53

	54 while getopts "+i:o:f:m:t:eQC" opt; do

	55

	56 case "$opt" in

	57

	58 "i")

	59 IN_DIR="$OPTARG"

	60 ;;

	61

	62 "o")

	63 OUT_DIR="$OPTARG"

	64 ;;

	65 "f")

	66 STDIN_FILE="$OPTARG"

	67 ;;

	68 "m")

	69 MEM_LIMIT="$OPTARG"

	70 MEM_LIMIT_GIVEN=1

	71 ;;

	72 "t")

	73 TIMEOUT="$OPTARG"

	74 ;;

	75 "e")

	76 EXTRA_PAR="$EXTRA_PAR -e"

	77 ;;

	78 "C")

	79 export AFL_CMIN_CRASHES_ONLY=1

	80 ;;

	81 "Q")

	82 EXTRA_PAR="$EXTRA_PAR -Q"

	83 test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250

	84 QEMU_MODE=1

	85 ;;

	86 "?")

	87 exit 1

	88 ;;

	89

	90 esac

	91

	92 done

	93

	94 shift $((OPTIND-1))

	95

	96 TARGET_BIN="$1"

	97

	98 if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then

	99

	100 cat 1>&2 <<_EOF_

	101 Usage: $0 [ options ] -- /path/to/target_app [ ... ]

	102

	103 Required parameters:

	104

	105 -i dir - input directory with the starting corpus

	106 -o dir - output directory for minimized files

	107

	108 Execution control settings:

	109

	110 -f file - location read by the fuzzed program (stdin)

	111 -m megs - memory limit for child process ($MEM_LIMIT MB)

	112 -t msec - run time limit for child process (none)

	113 -Q - use binary-only instrumentation (QEMU mode)

	114

	115 Minimization settings:

	116

	117 -C - keep crashing inputs, reject everything else

	118 -e - solve for edge coverage only, ignore hit counts

	119

	120 For additional tips, please consult docs/README.

	121

	122 _EOF_

	123 exit 1

	124 fi

	125

	126 # Do a sanity check to discourage the use of /tmp, since we can't really

	127 # handle this safely from a shell script.

	128

	129 echo "$IN_DIR" \| grep -qE '^(/var)?/tmp/'

	130 T1="$?"

	131

	132 echo "$TARGET_BIN" \| grep -qE '^(/var)?/tmp/'

	133 T2="$?"

	134

	135 echo "$OUT_DIR" \| grep -qE '^(/var)?/tmp/'

	136 T3="$?"

	137

	138 echo "$STDIN_FILE" \| grep -qE '^(/var)?/tmp/'

	139 T4="$?"

	140

	141 echo "$PWD" \| grep -qE '^(/var)?/tmp/'

	142 T5="$?"

	143

	144 if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then

	145 echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2

	146 exit 1

	147 fi

	148

	149 # If @@ is specified, but there's no -f, let's come up with a temporary input

	150 # file name.

	151

	152 TRACE_DIR="$OUT_DIR/.traces"

	153

	154 if [ "$STDIN_FILE" = "" ]; then

	155

	156 if echo "$*" \| grep -qF '@@'; then

	157 STDIN_FILE="$TRACE_DIR/.cur_input"

	158 fi

	159

	160 fi

	161

	162 # Check for obvious errors.

	163

	164 if [ ! "$MEM_LIMIT" = "none" ]; then

	165

	166 if [ "$MEM_LIMIT" -lt "5" ]; then

	167 echo "[-] Error: dangerously low memory limit." 1>&2

	168 exit 1

	169 fi

	170

	171 fi

	172

	173 if [ ! "$TIMEOUT" = "none" ]; then

	174

	175 if [ "$TIMEOUT" -lt "10" ]; then

	176 echo "[-] Error: dangerously low timeout." 1>&2

	177 exit 1

	178 fi

	179

	180 fi

	181

	182 if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then

	183

	184 TNEW="`which "$TARGET_BIN" 2>/dev/null`"

	185

	186 if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then

	187 echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2

	188 exit 1

	189 fi

	190

	191 TARGET_BIN="$TNEW"

	192

	193 fi

	194

	195 if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then

	196

	197 if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then

	198 echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1> &2

	199 exit 1

	200 fi

	201

	202 fi

	203

	204 if [ ! -d "$IN_DIR" ]; then

	205 echo "[-] Error: directory '$IN_DIR' not found." 1>&2

	206 exit 1

	207 fi

	208

	209 test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"

	210

	211 find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null

	212 rm -rf "$TRACE_DIR" 2>/dev/null

	213

	214 rmdir "$OUT_DIR" 2>/dev/null

	215

	216 if [ -d "$OUT_DIR" ]; then

	217 echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it firs t." 1>&2

	218 exit 1

	219 fi

	220

	221 mkdir -m 700 -p "$TRACE_DIR" \|\| exit 1

	222

	223 if [ ! "$STDIN_FILE" = "" ]; then

	224 rm -f "$STDIN_FILE" \|\| exit 1

	225 touch "$STDIN_FILE" \|\| exit 1

	226 fi

	227

	228 if [ "$AFL_PATH" = "" ]; then

	229 SHOWMAP="${0%/afl-cmin}/afl-showmap"

	230 else

	231 SHOWMAP="$AFL_PATH/afl-showmap"

	232 fi

	233

	234 if [ ! -x "$SHOWMAP" ]; then

	235 echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2

	236 rm -rf "$TRACE_DIR"

	237 exit 1

	238 fi

	239

	240 IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null \| wc -l`))

	241

	242 if [ "$IN_COUNT" = "0" ]; then

	243 echo "No inputs in the target directory - nothing to be done."

	244 rm -rf "$TRACE_DIR"

	245 exit 1

	246 fi

	247

	248 FIRST_FILE=`ls "$IN_DIR" \| head -1`

	249

	250 if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then

	251 CP_TOOL=ln

	252 else

	253 CP_TOOL=cp

	254 fi

	255

	256 # Make sure that we can actually get anything out of afl-showmap before we

	257 # waste too much time.

	258

	259 echo "[*] Testing the target binary..."

	260

	261 if [ "$STDIN_FILE" = "" ]; then

	262

	263 AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/. run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"

	264

	265 else

	266

	267 cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"

	268 AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/. run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null

	269

	270 fi

	271

	272 FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))

	273

	274 if [ "$FIRST_COUNT" -gt "0" ]; then

	275

	276 echo "[+] OK, $FIRST_COUNT tuples recorded."

	277

	278 else

	279

	280 echo "[-] Error: no instrumentation output detected (perhaps crash or timeout) ." 1>&2

	281 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"

	282 exit 1

	283

	284 fi

	285

	286 # Let's roll!

	287

	288 #############################

	289 # STEP 1: COLLECTING TRACES #

	290 #############################

	291

	292 echo "[*] Obtaining traces for input files in '$IN_DIR'..."

	293

	294 (

	295

	296 CUR=0

	297

	298 if [ "$STDIN_FILE" = "" ]; then

	299

	300 while read -r fn; do

	301

	302 CUR=$((CUR+1))

	303 printf "\\r Processing file $CUR/$IN_COUNT... "

	304

	305 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"

	306

	307 done < <(ls "$IN_DIR")

	308

	309 else

	310

	311 while read -r fn; do

	312

	313 CUR=$((CUR+1))

	314 printf "\\r Processing file $CUR/$IN_COUNT... "

	315

	316 cp "$IN_DIR/$fn" "$STDIN_FILE"

	317

	318 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null

	319

	320 done < <(ls "$IN_DIR")

	321

	322

	323 fi

	324

	325 )

	326

	327 echo

	328

	329 ##########################

	330 # STEP 2: SORTING TUPLES #

	331 ##########################

	332

	333 # With this out of the way, we sort all tuples by popularity across all

	334 # datasets. The reasoning here is that we won't be able to avoid the files

	335 # that trigger unique tuples anyway, so we will want to start with them and

	336 # see what's left.

	337

	338 echo "[*] Sorting trace sets (this may take a while)..."

	339

	340 ls "$IN_DIR" \| sed "s#^#$TRACE_DIR/#" \| tr '\n' '\0' \| xargs -0 -n 1 cat \| \

	341 sort \| uniq -c \| sort -n >"$TRACE_DIR/.all_uniq"

	342

	343 TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))

	344

	345 echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."

	346

	347 #####################################

	348 # STEP 3: SELECTING CANDIDATE FILES #

	349 #####################################

	350

	351 # The next step is to find the best candidate for each tuple. The "best"

	352 # part is understood simply as the smallest input that includes a particular

	353 # tuple in its trace. Empirical evidence suggests that this produces smaller

	354 # datasets than more involved algorithms that could be still pulled off in

	355 # a shell script.

	356

	357 echo "[*] Finding best candidates for each tuple..."

	358

	359 CUR=0

	360

	361 while read -r fn; do

	362

	363 CUR=$((CUR+1))

	364 printf "\\r Processing file $CUR/$IN_COUNT... "

	365

	366 sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"

	367

	368 done < <(ls -rS "$IN_DIR")

	369

	370 echo

	371

	372 ##############################

	373 # STEP 4: LOADING CANDIDATES #

	374 ##############################

	375

	376 # At this point, we have a file of tuple-file pairs, sorted by file size

	377 # in ascending order (as a consequence of ls -rS). By doing sort keyed

	378 # only by tuple (-k 1,1) and configured to output only the first line for

	379 # every key (-s -u), we end up with the smallest file for each tuple.

	380

	381 echo "[*] Sorting candidate list (be patient)..."

	382

	383 sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" \| \

	384 sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"

	385

	386 if [ ! -s "$TRACE_DIR/.candidate_script" ]; then

	387 echo "[-] Error: no traces obtained from test cases, check syntax!"

	388 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"

	389 exit 1

	390 fi

	391

	392 # The sed command converted the sorted list to a shell script that populates

	393 # BEST_FILE[tuple]="fname". Let's load that!

	394

	395 . "$TRACE_DIR/.candidate_script"

	396

	397 ##########################

	398 # STEP 5: WRITING OUTPUT #

	399 ##########################

	400

	401 # The final trick is to grab the top pick for each tuple, unless said tuple is

	402 # already set due to the inclusion of an earlier candidate; and then put all

	403 # tuples associated with the newly-added file to the "already have" list. The

	404 # loop works from least popular tuples and toward the most common ones.

	405

	406 echo "[*] Processing candidates and writing output files..."

	407

	408 CUR=0

	409

	410 touch "$TRACE_DIR/.already_have"

	411

	412 while read -r cnt tuple; do

	413

	414 CUR=$((CUR+1))

	415 printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "

	416

	417 # If we already have this tuple, skip it.

	418

	419 grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue

	420

	421 FN=${BEST_FILE[tuple]}

	422

	423 $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"

	424

	425 if [ "$((CUR % 5))" = "0" ]; then

	426 sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"

	427 mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"

	428 else

	429 cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"

	430 fi

	431

	432 done <"$TRACE_DIR/.all_uniq"

	433

	434 echo

	435

	436 OUT_COUNT=`ls -- "$OUT_DIR" \| wc -l`

	437

	438 if [ "$OUT_COUNT" = "1" ]; then

	439 echo "[!] WARNING: All test cases had the same traces, check syntax!"

	440 fi

	441

	442 echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."

	443 echo

	444

	445 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"

	446

	447 exit 0

OLD	NEW

« no previous file with comments | « third_party/afl/src/afl-as.c ('k') | third_party/afl/src/afl-fuzz.c » ('j') | no next file with comments »