AFL源码分析(二)

本文最后更新于:2023年10月6日 晚上

AFL源码分析(二)

afl-as.c

main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
int main(int argc, char** argv) {

s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO"); // 获取环境变量AFL_INST_RATIO
// 控制检测分支的概率(从0到100之间)

struct timeval tv;
struct timezone tz;

clang_mode = !!getenv(CLANG_ENV_VAR); // 获取环境变量CLANG_ENV_VAR,并转换为bool类型

if (isatty(2) && !getenv("AFL_QUIET")) { // 获取环境变量AFL_QUIET

SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");

} else be_quiet = 1;

if (argc < 2) { // 若参数个数小于2,打印错误信息并返回

SAYF("\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
"don't want to run this program directly.\n\n"

"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
"instrumenting every discovered branch.\n\n");

exit(1);

}

gettimeofday(&tv, &tz); // 获取精确时间,时间信息存储到tv,时区信息存储到tz中

rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); // 根据时间及进程id设置rand_seed

srandom(rand_seed);

edit_params(argc, argv); // 解析并修改传递给as的变量

if (inst_ratio_str) { // 若inst_ratio_str不为空,则赋值给inst_ratio

if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

}

if (getenv(AS_LOOP_ENV_VAR)) // 获取环境变量AS_LOOP_ENV_VAR,若不返回0,则报错
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

setenv(AS_LOOP_ENV_VAR, "1", 1); // 设置环境变量AS_LOOP_ENV_VAR=1

/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */

if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) { // 若设置了ascn或者msan,设置inst_ratio为原来的1/3
sanitizer = 1;
inst_ratio /= 3;
}

if (!just_version) add_instrumentation(); // 若不是仅仅检测版本信息,则调用add_instrumentation进行插桩

if (!(pid = fork())) { // fork子进程调用as_params[0]

execvp(as_params[0], (char**)as_params);
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

}

if (pid < 0) PFATAL("fork() failed");

if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); // 父进程等待子进程结束

if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file); // 获取环境变量AFL_KEEP_ASSEMBLY,若返回0,则删除modified_file

exit(WEXITSTATUS(status));

}

为了方便查看as_params的值,我们追加如下代码:

1
2
3
4
for (int i = 0; i < sizeof(as_params); i++) 
{
printf("as_params[%d]:%s\n", i, as_params[i]);
}

当我们执行afl-as -o myafl.o myafl.s时,输出如下:

1
2
3
4
5
6
7
8
as_params[0]:as
as_params[1]:-o
as_params[2]:/home/p2lst/Documents/fuzz/test1/myafl.o
as_params[3]:/home/p2lst/Documents/fuzz/test1/.afl-66163-1696406546.s
as_params[4]:(null)
as_params[5]:(null)
as_params[6]:(null)
as_params[7]:(null)

可以看到,最终执行的是as。

由此可见,afl-as也是对as进行的再封装。

edit_params

解析并修改传递给as的变量。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* Examine and modify parameters to pass to 'as'. Note that the file name
is always the last parameter passed by GCC, so we exploit this property
to keep the code simple. */

static void edit_params(int argc, char** argv) {

u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); // 获取环境变量TMPDIR,与AFL_AS
u32 i;

#ifdef __APPLE__ // apple平台

u8 use_clang_as = 0;

/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.

To work around this, when using clang and running without AFL_AS
specified, we will actually call 'clang -c' instead of 'as -q' to
compile the assembly file.

The tools aren't cmdline-compatible, but at least for now, we can
seemingly get away with this by making only very minor tweaks. Thanks
to Nico Weber for the idea. */

if (clang_mode && !afl_as) {

use_clang_as = 1;

afl_as = getenv("AFL_CC");
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";

}

#endif /* __APPLE__ */

/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */

if (!tmp_dir) tmp_dir = getenv("TEMP"); // 获取环境变量TEMP
if (!tmp_dir) tmp_dir = getenv("TMP"); // 获取环境变量TMP
if (!tmp_dir) tmp_dir = "/tmp"; // 若上述失败,设置tmp_dir="/tmp"

as_params = ck_alloc((argc + 32) * sizeof(u8*)); // 为as_params分配足够多的空间

as_params[0] = afl_as ? afl_as : (u8*)"as"; // 若afl_as为空,设置as_params[0]=as

as_params[argc] = 0; // 追加NULL,作为as_params数组结束

for (i = 1; i < argc - 1; i++) { // 循环遍历argv

if (!strcmp(argv[i], "--64")) use_64bit = 1; // 若遍历到--64,设置use_64bit为1
else if (!strcmp(argv[i], "--32")) use_64bit = 0; // 若遍历到--32,设置use_64bit为0

#ifdef __APPLE__ // apple平台

/* The Apple case is a bit different... */

if (!strcmp(argv[i], "-arch") && i + 1 < argc) {

if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
else if (!strcmp(argv[i + 1], "i386"))
FATAL("Sorry, 32-bit Apple platforms are not supported.");

}

/* Strip options that set the preference for a particular upstream
assembler in Xcode. */

if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;

#endif /* __APPLE__ */

as_params[as_par_cnt++] = argv[i]; // 将argv添加到as_params中

}

#ifdef __APPLE__ // apple平台

/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */

if (use_clang_as) { // 若use_clang_as不为空,则默认添加以下参数

as_params[as_par_cnt++] = "-c";
as_params[as_par_cnt++] = "-x";
as_params[as_par_cnt++] = "assembler";

}

#endif /* __APPLE__ */

input_file = argv[argc - 1]; // 输入文件为argv最后一个参数的值

if (input_file[0] == '-') { // 判断第一个字符是否为-

if (!strcmp(input_file + 1, "-version")) { // 判断是否为-version,若是,表示仅仅查看版本信息,设置just_version=1
just_version = 1;
modified_file = input_file;
goto wrap_things_up;
}

if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)"); // 若input_file[1]为空,报错
else input_file = NULL;

} else {

/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */

// 若input文件包含tmp_dir,或者/var/tmp,或者/tmp,则设置pass_thru=0,否则设置为1,该选项会影响后续插桩
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}

modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL)); // 设置修改modified_file字符串

wrap_things_up:

as_params[as_par_cnt++] = modified_file; // 添加到as_params参数中
as_params[as_par_cnt] = NULL;

}

add_instrumentation

处理输入文件,对汇编文件基本块进行插桩。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/* Process input file, generate modified_file. Insert instrumentation in all
the appropriate places. */
static void add_instrumentation(void) {

static u8 line[MAX_LINE];

FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;

u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

u8* colon_pos;

#endif /* __APPLE__ */

if (input_file) { // 判断input_file是否存在,并打开

inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);

} else inf = stdin;

outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); // 打开modified_file

if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

outf = fdopen(outfd, "w"); // 以"w"模式打开modified_file

if (!outf) PFATAL("fdopen() failed");

while (fgets(line, MAX_LINE, inf)) { // 循环读取inf文件的单行内容到line,最多读取8192字节

/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */

/* 若pass_thru,skip_intel,skip_app,skip_csect为空,instr_ok与instument_next为真,
且line[0]为'\t',line[1]为字符,则进行插桩。
即判断符合插桩的条件 */
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE)); // 插桩,将trampoline_fmt_64或者trampoline_fmt_32写入outf

instrument_next = 0;
ins_lines++; // 插桩计数+1

}

/* Output the actual line, call it a day in pass-thru mode. */

fputs(line, outf); // 将原始line写入outf

if (pass_thru) continue; // 若pass_thru不为空,则跳过本次循环

/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */

if (line[0] == '\t' && line[1] == '.') {

/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
// 处理OpenBSD的特殊情况。若遇到p2align,且后面跟上数字与换行的情况,设置skip_next_label=1
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;

// 若line包含"text\n", 或者"section\t.text", 或者"section\t__TEXT,__text", 或者"section __TEXT,__text",
// 设置instr_ok为1, 跳过本次循环
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}
// 同理,若发现以下非代码段字段,设置instr_ok为0,跳出循环
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}

}

/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */

if (strstr(line, ".code")) { // 检测异架构的情况

if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;

}

/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
// 检测语法更改,跳过intel形式,直到回到AT&T形式
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;

/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
// 检测并跳过ad-hoc __asm__块
if (line[0] == '#' || line[1] == '#') {

if (strstr(line, "#APP")) skip_app = 1;
if (strstr(line, "#NO_APP")) skip_app = 0;

}

// 插桩重点对象
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:

^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches

...but not:

^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps

Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.

*/

if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue; // 若出现非法情况,跳出本次循环

/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */

// 若检测到非jmp的跳转,进行插桩,且ins_lines计数器+1
if (line[0] == '\t') {

if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));

ins_lines++;

}

continue;

}

/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */

// 检测合法分支

#ifdef __APPLE__

/* Apple: L<whatever><digit>: */

if ((colon_pos = strstr(line, ":"))) {

if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {

#else

/* Everybody else: .L<whatever>: */

if (strstr(line, ":")) { // 分支包含":"

if (line[0] == '.') { // 若第一个字符为.,则进行下述判断;否则直接设置instrument_next=1

#endif /* __APPLE__ */

/* .L0: or LBB0_0: style jump destination */

#ifdef __APPLE__ // apple模式

/* Apple: L<num> / LBB<num> */
// apple的合法分支模式
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {

#else

/* Apple: .L<num> / .LBB<num> */

// 若gcc与clang是合法分支模式,且skip_next_label为空,则一定概率设置instument_next为1
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {

#endif /* __APPLE__ */

/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.

We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */

if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;

}

} else {

/* Function label (always instrumented, deferred mode). */

instrument_next = 1;

}

}

}

if (ins_lines) // 若ins_lines >= 0, 将main_payload_64写入到outf文件
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

if (input_file) fclose(inf);
fclose(outf); // 关闭inf文件与outf文件

if (!be_quiet) { // 若非quiet模式,输出插桩情况

if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);

}

}

插桩代码分析

插桩过程中,我们插入了trampoline_fmt_64main_payload_64

接下来,对插入的内容进行分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static const u8* trampoline_fmt_64 =

"\n"
"/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leaq -(128+24)(%%rsp), %%rsp\n"
"movq %%rdx, 0(%%rsp)\n"
"movq %%rcx, 8(%%rsp)\n"
"movq %%rax, 16(%%rsp)\n"
"movq $0x%08x, %%rcx\n"
"call __afl_maybe_log\n"
"movq 16(%%rsp), %%rax\n"
"movq 8(%%rsp), %%rcx\n"
"movq 0(%%rsp), %%rdx\n"
"leaq (128+24)(%%rsp), %%rsp\n"
"\n"
"/* --- END --- */\n"
"\n";

这个汇编格式为AT&T格式,在IDA中反编译插桩过的源码,可得到intel格式的汇编。

1
2
3
4
5
6
7
8
9
10
lea     rsp, [rsp-98h]
mov [rsp+0C0h+var_C0], rdx
mov [rsp+0C0h+var_B8], rcx
mov [rsp+0C0h+var_B0], rax
mov rcx, 0DE9Eh
call __afl_maybe_log
mov rax, [rsp+0C0h+var_B0]
mov rcx, [rsp+0C0h+var_B8]
mov rdx, [rsp+0C0h+var_C0]
lea rsp, [rsp+98h]

主要含义为:

  1. 为变量分配栈空间
  2. 保存rdx,rcx,rax的值
  3. 将rcx的值设置为R(MAP_SIZE),作为桩代码标志
  4. 调用__afl_maybe_log
  5. 恢复rdx,rcx,rax的值

重点调用__afl_maybe_log函数,对该函数进行分析。

笔者自知实力不足,以下分析主要引用深信服千里目安全实验室

image-20231004174003896

首先,对上述引用的变量进行解释。(参考main_payload_64

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#ifdef __APPLE__

" .comm __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
" .comm __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
" .comm __afl_fork_pid, 4\n"
" .comm __afl_temp, 4\n"
" .comm __afl_setup_failure, 1\n"

#else

" .lcomm __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
" .lcomm __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
" .lcomm __afl_fork_pid, 4\n"
" .lcomm __afl_temp, 4\n"
" .lcomm __afl_setup_failure, 1\n"

#endif /* ^__APPLE__ */

" .comm __afl_global_area_ptr, 8, 8\n"
"\n"

定义了以下变量:

__afl_area_ptr: 共享内存地址

__afl_prev_loc: 上一个插桩位置

__afl_fork_pid: fork server产生的子进程的pid

__afl_global_area_ptr: 临时变量

__afl_setup_failure: 失败标识位

__afl_global_area_ptr: 全局指针

这里主要借IDA中intel格式汇编进行分析。

__afl_maybe_log

1
2
3
4
5
lahf
seto al
mov rdx, cs:__afl_area_ptr
test rdx, rdx
jz short __afl_setup

首先,使用 lahf 指令(加载状态标志位到AH)将EFLAGS寄存器的低八位复制到 AH,被复制的标志位包括:符号标志位(SF)、零标志位(ZF)、辅助进位标志位(AF)、奇偶标志位(PF)和进位标志位(CF),使用该指令可以方便地将标志位副本保存在变量中;

然后,使用 seto 指令溢出置位;

最后,判断__afl_area_ptr是否为空。若为空跳转到__afl_setup,否则继续执行。

__afl_setup

1
2
3
4
5
6
7
8
9
10
11
__afl_setup:
cmp cs:__afl_setup_failure, 0
jnz short __afl_return

mov rdx, offset __afl_global_area_ptr
mov rdx, [rdx]
test rdx, rdx
jz short __afl_setup_first

mov cs:__afl_area_ptr, rdx
jmp short __afl_store

首先,判断__afl_setup_failure是否为空,即判断是否出错。若出错直接返回,否则跳转到__afl_return

其次,判断__afl_global_area_ptr是否为空。若为空,则跳转到__afl_setup_first,否则将__afl_global_area_ptr赋值给__afl_area_ptr,调用__afl_store

__afl_setup_first

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
__afl_setup_first:
lea rsp, [rsp-160h]
mov [rsp+160h+var_160], rax
mov [rsp+160h+var_158], rcx
mov [rsp+160h+var_150], rdi
mov [rsp+160h+var_140], rsi
mov [rsp+160h+var_138], r8
mov [rsp+160h+var_130], r9
mov [rsp+160h+var_128], r10
mov [rsp+160h+var_120], r11
movq [rsp+160h+var_100], xmm0
movq [rsp+160h+var_F0], xmm1
movq [rsp+160h+var_E0], xmm2
movq [rsp+160h+var_D0], xmm3
movq [rsp+160h+var_C0], xmm4
movq [rsp+160h+var_B0], xmm5
movq [rsp+160h+var_A0], xmm6
movq [rsp+160h+var_90], xmm7
movq [rsp+160h+var_80], xmm8
movq [rsp+160h+var_70], xmm9
movq [rsp+160h+var_60], xmm10
movq [rsp+160h+var_50], xmm11
movq [rsp+160h+var_40], xmm12
movq [rsp+160h+var_30], xmm13
movq [rsp+160h+var_20], xmm14
movq [rsp+160h+var_10], xmm15
push r12
mov r12, rsp
sub rsp, 10h
and rsp, 0FFFFFFFFFFFFFFF0h
lea rdi, _AFL_SHM_ENV ; "__AFL_SHM_ID"
call getenv ; PIC mode
test rax, rax
jz __afl_setup_abort

mov rdi, rax ; nptr
call atoi ; PIC mode
xor rdx, rdx ; shmflg
xor rsi, rsi ; shmaddr
mov rdi, rax ; shmid
call shmat ; PIC mode
cmp rax, 0FFFFFFFFFFFFFFFFh
jz __afl_setup_abort

mov rdx, rax
mov cs:__afl_area_ptr, rax
mov rdx, offset __afl_global_area_ptr
mov [rdx], rax
mov rdx, rax

首先,保存寄存器的值,将rsp进行16字节对齐

然后,调用getenv获取共享内存的id(共享内存id在afl-fuzz中设置)。若获取成功,则继续向下执行;若获取失败,则跳转到__afl_setup_abort

其次,调用atoi将共享id转换为int型,调用shmat启动共享内存。若成功,则继续向下执行;否则,跳转到__afl_setup_abort

最后,把共享内存地址赋值给__afl_area_ptr__afl_global_area_ptr

__afl_forkserver

1
2
3
4
5
6
7
8
9
10
__afl_forkserver:
push rdx
push rdx
mov rdx, 4 ; n
lea rsi, __afl_temp ; buf
mov rdi, 0C7h ; fd
call write ; PIC mode
cmp rax, 4
jnz __afl_fork_resume

FORKSRV_FD + 1(管道)写入__afl_temp,告诉父程序,fork server启动成功。

__afl_fork_wait_loop

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
__afl_fork_wait_loop:   ; nbytes
mov rdx, 4
lea rsi, __afl_temp ; buf
mov rdi, 0C6h ; status
call read ; PIC mode
cmp rax, 4
jnz __afl_die

call fork ; PIC mode
cmp rax, 0
jl __afl_die
jz short __afl_fork_resume

mov cs:__afl_fork_pid, eax
mov rdx, 4 ; n
lea rsi, __afl_fork_pid ; buf
mov rdi, 0C7h ; fd
call write ; PIC mode
mov rdx, 0 ; options
lea rsi, __afl_temp ; stat_loc
mov rdi, qword ptr cs:__afl_fork_pid ; pid
call waitpid ; PIC mode
cmp rax, 0
jle __afl_die

mov rdx, 4 ; n
lea rsi, __afl_temp ; buf
mov rdi, 0C7h ; fd
call write ; PIC mode
jmp __afl_fork_wait_loop
  1. FORKSRV_FD ,即控制管道中获取指令,读入4bytes到__afl_temp。若读取成功,继续执行;若读取失败,跳转到__afl_die
  2. 调用fork生成子进程,子进程调用__afl_fork_resume
  3. 父进程调用write将子进程的pid写入到FORKSRV_FD + 1中,并调用waitpid等待子进程结束,并将字进程状态信息写入到FORKSRV_FD + 1

__afl_fork_resume

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
__afl_fork_resume:      ; fd
mov rdi, 0C6h
call close ; PIC mode
mov rdi, 0C7h ; fd
call close ; PIC mode
pop rdx
pop rdx
mov rsp, r12
pop r12
mov rax, [rsp+160h+var_160]
mov rcx, [rsp+160h+var_158]
mov rdi, [rsp+160h+var_150]
mov rsi, [rsp+160h+var_140]
mov r8, [rsp+160h+var_138]
mov r9, [rsp+160h+var_130]
mov r10, [rsp+160h+var_128]
mov r11, [rsp+160h+var_120]
movq xmm0, [rsp+160h+var_100]
movq xmm1, [rsp+160h+var_F0]
movq xmm2, [rsp+160h+var_E0]
movq xmm3, [rsp+160h+var_D0]
movq xmm4, [rsp+160h+var_C0]
movq xmm5, [rsp+160h+var_B0]
movq xmm6, [rsp+160h+var_A0]
movq xmm7, [rsp+160h+var_90]
movq xmm8, [rsp+160h+var_80]
movq xmm9, [rsp+160h+var_70]
movq xmm10, [rsp+160h+var_60]
movq xmm11, [rsp+160h+var_50]
movq xmm12, [rsp+160h+var_40]
movq xmm13, [rsp+160h+var_30]
movq xmm14, [rsp+160h+var_20]
movq xmm15, [rsp+160h+var_10]
lea rsp, [rsp+160h]
jmp __afl_store

由于父子进程共享文件描述符,子进程关闭FORKSRV_FDFORKSRV_FD + 1,恢复寄存器的值,调用__afl_store

__afl_store

1
2
3
4
5
__afl_store:
xor rcx, cs:__afl_prev_loc
xor cs:__afl_prev_loc, rcx
shr cs:__afl_prev_loc, 1
inc byte ptr [rdx+rcx]

这里直接查看反编译源码:

1
2
3
4
v7 = _afl_prev_loc ^ a4;
_afl_prev_loc ^= v7;
_afl_prev_loc = (unsigned __int64)_afl_prev_loc >> 1;
++*(_BYTE *)(v6 + v7);

其中a4为插入的随机值,也即rcx, 而 _afl_prev_loc 其实是上一个桩的随机id。

经过两次异或之后,再将 _afl_prev_loc 右移一位作为新的 _afl_prev_loc,避免诸如此类路径(A -> A, B -> B, A -> B, B -> A)混淆。最后在共享内存中存储当前插桩位置的地方计数加一。

参考链接

https://eternalsakura13.com/2020/08/23/afl/

https://hollk.blog.csdn.net/category_11470526.html

https://paper.seebug.org/1732/

https://www.z1r0.top/2023/03/23/AFL-fuzz%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90/


AFL源码分析(二)
http://example.com/2023/10/04/AFL源码分析二/
作者
l1s00t
发布于
2023年10月4日
更新于
2023年10月6日
许可协议