Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: third_party/afl/src/afl-cmin

Issue 2075883002: Add American Fuzzy Lop (afl) to third_party/afl/ (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix nits Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/afl/src/afl-as.c ('k') | third_party/afl/src/afl-fuzz.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env bash
2 #
3 # american fuzzy lop - corpus minimization tool
4 # ---------------------------------------------
5 #
6 # Written and maintained by Michal Zalewski <lcamtuf@google.com>
7 #
8 # Copyright 2014, 2015 Google Inc. All rights reserved.
9 #
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at:
13 #
14 # http://www.apache.org/licenses/LICENSE-2.0
15 #
16 # This tool tries to find the smallest subset of files in the input directory
17 # that still trigger the full range of instrumentation data points seen in
18 # the starting corpus. This has two uses:
19 #
20 # - Screening large corpora of input files before using them as a seed for
21 # afl-fuzz. The tool will remove functionally redundant files and likely
22 # leave you with a much smaller set.
23 #
24 # (In this case, you probably also want to consider running afl-tmin on
25 # the individual files later on to reduce their size.)
26 #
27 # - Minimizing the corpus generated organically by afl-fuzz, perhaps when
28 # planning to feed it to more resource-intensive tools. The tool achieves
29 # this by removing all entries that used to trigger unique behaviors in the
30 # past, but have been made obsolete by later finds.
31 #
32 # Note that the tool doesn't modify the files themselves. For that, you want
33 # afl-tmin.
34 #
35 # This script must use bash because other shells may have hardcoded limits on
36 # array sizes.
37 #
38
39 echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>"
40 echo
41
42 #########
43 # SETUP #
44 #########
45
46 # Process command-line options...
47
48 MEM_LIMIT=100
49 TIMEOUT=none
50
51 unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
52 AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
53
54 while getopts "+i:o:f:m:t:eQC" opt; do
55
56 case "$opt" in
57
58 "i")
59 IN_DIR="$OPTARG"
60 ;;
61
62 "o")
63 OUT_DIR="$OPTARG"
64 ;;
65 "f")
66 STDIN_FILE="$OPTARG"
67 ;;
68 "m")
69 MEM_LIMIT="$OPTARG"
70 MEM_LIMIT_GIVEN=1
71 ;;
72 "t")
73 TIMEOUT="$OPTARG"
74 ;;
75 "e")
76 EXTRA_PAR="$EXTRA_PAR -e"
77 ;;
78 "C")
79 export AFL_CMIN_CRASHES_ONLY=1
80 ;;
81 "Q")
82 EXTRA_PAR="$EXTRA_PAR -Q"
83 test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
84 QEMU_MODE=1
85 ;;
86 "?")
87 exit 1
88 ;;
89
90 esac
91
92 done
93
94 shift $((OPTIND-1))
95
96 TARGET_BIN="$1"
97
98 if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
99
100 cat 1>&2 <<_EOF_
101 Usage: $0 [ options ] -- /path/to/target_app [ ... ]
102
103 Required parameters:
104
105 -i dir - input directory with the starting corpus
106 -o dir - output directory for minimized files
107
108 Execution control settings:
109
110 -f file - location read by the fuzzed program (stdin)
111 -m megs - memory limit for child process ($MEM_LIMIT MB)
112 -t msec - run time limit for child process (none)
113 -Q - use binary-only instrumentation (QEMU mode)
114
115 Minimization settings:
116
117 -C - keep crashing inputs, reject everything else
118 -e - solve for edge coverage only, ignore hit counts
119
120 For additional tips, please consult docs/README.
121
122 _EOF_
123 exit 1
124 fi
125
126 # Do a sanity check to discourage the use of /tmp, since we can't really
127 # handle this safely from a shell script.
128
129 echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
130 T1="$?"
131
132 echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
133 T2="$?"
134
135 echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
136 T3="$?"
137
138 echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
139 T4="$?"
140
141 echo "$PWD" | grep -qE '^(/var)?/tmp/'
142 T5="$?"
143
144 if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
145 echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
146 exit 1
147 fi
148
149 # If @@ is specified, but there's no -f, let's come up with a temporary input
150 # file name.
151
152 TRACE_DIR="$OUT_DIR/.traces"
153
154 if [ "$STDIN_FILE" = "" ]; then
155
156 if echo "$*" | grep -qF '@@'; then
157 STDIN_FILE="$TRACE_DIR/.cur_input"
158 fi
159
160 fi
161
162 # Check for obvious errors.
163
164 if [ ! "$MEM_LIMIT" = "none" ]; then
165
166 if [ "$MEM_LIMIT" -lt "5" ]; then
167 echo "[-] Error: dangerously low memory limit." 1>&2
168 exit 1
169 fi
170
171 fi
172
173 if [ ! "$TIMEOUT" = "none" ]; then
174
175 if [ "$TIMEOUT" -lt "10" ]; then
176 echo "[-] Error: dangerously low timeout." 1>&2
177 exit 1
178 fi
179
180 fi
181
182 if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
183
184 TNEW="`which "$TARGET_BIN" 2>/dev/null`"
185
186 if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
187 echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
188 exit 1
189 fi
190
191 TARGET_BIN="$TNEW"
192
193 fi
194
195 if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
196
197 if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
198 echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1> &2
199 exit 1
200 fi
201
202 fi
203
204 if [ ! -d "$IN_DIR" ]; then
205 echo "[-] Error: directory '$IN_DIR' not found." 1>&2
206 exit 1
207 fi
208
209 test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
210
211 find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
212 rm -rf "$TRACE_DIR" 2>/dev/null
213
214 rmdir "$OUT_DIR" 2>/dev/null
215
216 if [ -d "$OUT_DIR" ]; then
217 echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it firs t." 1>&2
218 exit 1
219 fi
220
221 mkdir -m 700 -p "$TRACE_DIR" || exit 1
222
223 if [ ! "$STDIN_FILE" = "" ]; then
224 rm -f "$STDIN_FILE" || exit 1
225 touch "$STDIN_FILE" || exit 1
226 fi
227
228 if [ "$AFL_PATH" = "" ]; then
229 SHOWMAP="${0%/afl-cmin}/afl-showmap"
230 else
231 SHOWMAP="$AFL_PATH/afl-showmap"
232 fi
233
234 if [ ! -x "$SHOWMAP" ]; then
235 echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
236 rm -rf "$TRACE_DIR"
237 exit 1
238 fi
239
240 IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
241
242 if [ "$IN_COUNT" = "0" ]; then
243 echo "No inputs in the target directory - nothing to be done."
244 rm -rf "$TRACE_DIR"
245 exit 1
246 fi
247
248 FIRST_FILE=`ls "$IN_DIR" | head -1`
249
250 if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
251 CP_TOOL=ln
252 else
253 CP_TOOL=cp
254 fi
255
256 # Make sure that we can actually get anything out of afl-showmap before we
257 # waste too much time.
258
259 echo "[*] Testing the target binary..."
260
261 if [ "$STDIN_FILE" = "" ]; then
262
263 AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/. run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
264
265 else
266
267 cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
268 AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/. run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
269
270 fi
271
272 FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
273
274 if [ "$FIRST_COUNT" -gt "0" ]; then
275
276 echo "[+] OK, $FIRST_COUNT tuples recorded."
277
278 else
279
280 echo "[-] Error: no instrumentation output detected (perhaps crash or timeout) ." 1>&2
281 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
282 exit 1
283
284 fi
285
286 # Let's roll!
287
288 #############################
289 # STEP 1: COLLECTING TRACES #
290 #############################
291
292 echo "[*] Obtaining traces for input files in '$IN_DIR'..."
293
294 (
295
296 CUR=0
297
298 if [ "$STDIN_FILE" = "" ]; then
299
300 while read -r fn; do
301
302 CUR=$((CUR+1))
303 printf "\\r Processing file $CUR/$IN_COUNT... "
304
305 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
306
307 done < <(ls "$IN_DIR")
308
309 else
310
311 while read -r fn; do
312
313 CUR=$((CUR+1))
314 printf "\\r Processing file $CUR/$IN_COUNT... "
315
316 cp "$IN_DIR/$fn" "$STDIN_FILE"
317
318 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
319
320 done < <(ls "$IN_DIR")
321
322
323 fi
324
325 )
326
327 echo
328
329 ##########################
330 # STEP 2: SORTING TUPLES #
331 ##########################
332
333 # With this out of the way, we sort all tuples by popularity across all
334 # datasets. The reasoning here is that we won't be able to avoid the files
335 # that trigger unique tuples anyway, so we will want to start with them and
336 # see what's left.
337
338 echo "[*] Sorting trace sets (this may take a while)..."
339
340 ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
341 sort | uniq -c | sort -n >"$TRACE_DIR/.all_uniq"
342
343 TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
344
345 echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
346
347 #####################################
348 # STEP 3: SELECTING CANDIDATE FILES #
349 #####################################
350
351 # The next step is to find the best candidate for each tuple. The "best"
352 # part is understood simply as the smallest input that includes a particular
353 # tuple in its trace. Empirical evidence suggests that this produces smaller
354 # datasets than more involved algorithms that could be still pulled off in
355 # a shell script.
356
357 echo "[*] Finding best candidates for each tuple..."
358
359 CUR=0
360
361 while read -r fn; do
362
363 CUR=$((CUR+1))
364 printf "\\r Processing file $CUR/$IN_COUNT... "
365
366 sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
367
368 done < <(ls -rS "$IN_DIR")
369
370 echo
371
372 ##############################
373 # STEP 4: LOADING CANDIDATES #
374 ##############################
375
376 # At this point, we have a file of tuple-file pairs, sorted by file size
377 # in ascending order (as a consequence of ls -rS). By doing sort keyed
378 # only by tuple (-k 1,1) and configured to output only the first line for
379 # every key (-s -u), we end up with the smallest file for each tuple.
380
381 echo "[*] Sorting candidate list (be patient)..."
382
383 sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
384 sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
385
386 if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
387 echo "[-] Error: no traces obtained from test cases, check syntax!"
388 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
389 exit 1
390 fi
391
392 # The sed command converted the sorted list to a shell script that populates
393 # BEST_FILE[tuple]="fname". Let's load that!
394
395 . "$TRACE_DIR/.candidate_script"
396
397 ##########################
398 # STEP 5: WRITING OUTPUT #
399 ##########################
400
401 # The final trick is to grab the top pick for each tuple, unless said tuple is
402 # already set due to the inclusion of an earlier candidate; and then put all
403 # tuples associated with the newly-added file to the "already have" list. The
404 # loop works from least popular tuples and toward the most common ones.
405
406 echo "[*] Processing candidates and writing output files..."
407
408 CUR=0
409
410 touch "$TRACE_DIR/.already_have"
411
412 while read -r cnt tuple; do
413
414 CUR=$((CUR+1))
415 printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
416
417 # If we already have this tuple, skip it.
418
419 grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
420
421 FN=${BEST_FILE[tuple]}
422
423 $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
424
425 if [ "$((CUR % 5))" = "0" ]; then
426 sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
427 mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
428 else
429 cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
430 fi
431
432 done <"$TRACE_DIR/.all_uniq"
433
434 echo
435
436 OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
437
438 if [ "$OUT_COUNT" = "1" ]; then
439 echo "[!] WARNING: All test cases had the same traces, check syntax!"
440 fi
441
442 echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
443 echo
444
445 test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
446
447 exit 0
OLDNEW
« no previous file with comments | « third_party/afl/src/afl-as.c ('k') | third_party/afl/src/afl-fuzz.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698