Linux syscall clone

时间:2022-12-17 16:57:21

clone与fork类似,都可以用来create一个新进程。与fork不同的是,clone 允许子进程与父进程共享一些运行时上下文资源,包括虚拟内存空间、fd table、signal handler table。

函数原型

glibc中定义的函数原型如下:

#define _GNU_SOURCE
#include <sched.h>

int clone(int (*fn)(void *), void *child_stack,
int flags, void *arg, ...
/* pid_t *ptid, void *newtls, pid_t *ctid */ );

参数

  1. fn。子进程执行的函数指针。

  2 . child_stack。子线程栈指针。注意栈的增长是向下的,所以指定child_stack时习惯使用栈的高地址界限指针。

  3. flags。一些clone特性,具体可以查看man page。

  4. 其他不定参数。传递一些用户关心的参数。

返回值

成功返回子进程id

失败返回-1并设置errno


用例

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sched.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
#include <limits.h>
#include <sys/mount.h>

#define STACK_SIZE (1024 * 1024)
#define CLONE_FLAGS (CLONE_NEWUTS | CLONE_NEWCGROUP | CLONE_NEWIPC \
| CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS \
| SIGCHLD)

#define ERR_EXIT(msg) do { perror(msg);exit(EXIT_FAILURE);\
}while(0)

struct container_t {
char stack[STACK_SIZE];
char *init_args[2];
char *cname;
int syncfd[2];
};
static int container_init(struct container_t *container) {
sethostname(container->cname,strlen(container->cname));

//mount("proc","/proc","proc",MS_PRIVATE,NULL);
system("mount -t proc proc /proc");
if(execv(container->init_args[0],container->init_args) == -1){
ERR_EXIT("execv container error.");
}

return 0;
}
static int container_run(void *arg){
struct container_t *container = (struct container_t*)arg;
pid_t pid = getpid();
printf("container: before execv pid:%d\n",pid);

close(container->syncfd[1]);
char ch;
if(read(container->syncfd[0],&ch,1) != 0){
ERR_EXIT("contaienr:failed read syncfd");
}
close(container->syncfd[0]);
return container_init(container);
}

void update_map(char *map,char *path) {
size_t len;
int fd;
len = strlen(map);
fd = open(path,O_RDWR);
if(fd == -1){
ERR_EXIT("open mapping file error.");
}
if(write(fd,map,len)!=len){
ERR_EXIT("write mappiing file error.");
}
}

void update_ugid_map(pid_t cpid){
const int MAP_BUF_SIZE = 100;
char map_buf[MAP_BUF_SIZE];
char map_path[PATH_MAX];

snprintf(map_buf,MAP_BUF_SIZE,"0 %ld 1",(long)getuid());
snprintf(map_path,PATH_MAX,"/proc/%ld/uid_map",(long)cpid);
update_map(map_buf,map_path);

snprintf(map_buf,MAP_BUF_SIZE,"0 %ld 1",(long)getgid());
snprintf(map_path,PATH_MAX,"/proc/%ld/gid_map",(long)cpid);
update_map(map_buf,map_path);
}
int main(){
struct container_t container = {
.cname = "container_001",
.init_args = {
"/bin/bash",
NULL,
},
};
if(pipe(container.syncfd) == -1)
ERR_EXIT("create syncfd error.");

pid_t cpid = clone(container_run,container.stack + STACK_SIZE,CLONE_FLAGS,&container);
if (cpid == -1){
ERR_EXIT("clone child error.");
}
printf("main: cloned container pid:%d\n",cpid);
update_ugid_map(cpid);
close(container.syncfd[1]);
waitpid(cpid,NULL,0);
return 0;
}