首页 > 代码库 > FUSE简介



传统的文件系统是操作系统的一部分,放在操作系统内核里面实现。Fuse(Filesystem in Userspace), 一个用户空间文件系统框架,提供给我们一组用于实现一个文件系统的API,使我们可以在用户态实现自已的文件系统。




3)可以把非文件的服务当做文件来实现,比如把gmail提供的巨大的空间用来进行文件存储的Gmail Filesystem




1)用户空间库给程序员提供编程接口,程序员通过实现fuse提供的两组接口fuse_lowlevel_ops, fuse_operations之一即可实现一个用户空间文件系统






struct fuse_lowlevel_ops的成员如下所示,其中init方法在其它所有方法之前调用,用于初始化文件系统,fuse已经实现,destroy则是在文件系统被卸载时做一些清理工作。用于大多数请求的参数都是fuse_ino_t类型的ino,而文件系统提供给用户的视图是以文件名呈现的,故lookup是实现文件系统的关键,它在parent中查找名字name对应的文件,并返回相应的信息,可使用fuse_reply_entry或fuse_reply_err作为请求的返回。


void(* init )(void *userdata, struct fuse_conn_info *conn)void(* destroy )(void *userdata)void(* lookup )(fuse_req_t req, fuse_ino_t parent, const char *name)void(* forget )(fuse_req_t req, fuse_ino_t ino, unsigned long nlookup)void(* getattr )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* setattr )(fuse_req_t req, fuse_ino_t ino, struct stat *attr, int to_set, struct fuse_file_info *fi)void(* readlink )(fuse_req_t req, fuse_ino_t ino)void(* mknod )(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, dev_t rdev)void(* mkdir )(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode)void(* unlink )(fuse_req_t req, fuse_ino_t parent, const char *name)void(* rmdir )(fuse_req_t req, fuse_ino_t parent, const char *name)void(* symlink )(fuse_req_t req, const char *link, fuse_ino_t parent, const char *name)void(* rename )(fuse_req_t req, fuse_ino_t parent, const char *name, fuse_ino_t newparent, const char *newname)void(* link )(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, const char *newname)void(* open )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* read )(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi)void(* write )(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, off_t off, structfuse_file_info *fi)void(* flush )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* release )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* fsync )(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi)void(* opendir )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* readdir )(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi)void(* releasedir )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)void(* fsyncdir )(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi)void(* statfs )(fuse_req_t req, fuse_ino_t ino)void(* setxattr )(fuse_req_t req, fuse_ino_t ino, const char *name, const char *value, size_t size, int flags)void(* getxattr )(fuse_req_t req, fuse_ino_t ino, const char *name, size_t size)void(* listxattr )(fuse_req_t req, fuse_ino_t ino, size_t size)void(* removexattr )(fuse_req_t req, fuse_ino_t ino, const char *name)void(* access )(fuse_req_t req, fuse_ino_t ino, int mask)void(* create )(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info*fi)void(* getlk )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct flock *lock)void(* setlk )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct flock *lock, int sleep)void(* bmap )(fuse_req_t req, fuse_ino_t ino, size_t blocksize, uint64_t idx)void(* ioctl )(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg, struct fuse_file_info *fi, unsigned *flagsp, const void *in_buf, size_t in_bufsz, size_t out_bufszp)void(* poll )(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct fuse_pollhandle *ph)



static struct {    void (*func)(fuse_req_t, fuse_ino_t, const void *);    const char *name;} fuse_ll_ops[] = {    //只列举了部分    [FUSE_LOOKUP]      = { do_lookup,      "LOOKUP"      },    [FUSE_OPEN]        = { do_open,        "OPEN"        },    [FUSE_READ]        = { do_read,        "READ"        },    [FUSE_WRITE]       = { do_write,       "WRITE"       },    [FUSE_STATFS]      = { do_statfs,      "STATFS"      },    [FUSE_FLUSH]       = { do_flush,       "FLUSH"       },    [FUSE_INIT]        = { do_init,        "INIT"        },    [FUSE_OPENDIR]     = { do_opendir,     "OPENDIR"     },    [FUSE_READDIR]     = { do_readdir,     "READDIR"     },    [FUSE_RELEASEDIR]  = { do_releasedir,  "RELEASEDIR"  },    [FUSE_DESTROY]     = { do_destroy,     "DESTROY"     }};


static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg){    struct fuse_read_in *arg = (struct fuse_read_in *) inarg;    // 如果用户实现了read操作,则调用用户空间的read,否则以没有实现该调用为错误响应,这里的op就是用户实现文件系统时实现的,并传递给fuse。    if (req->f->op.read) {        struct fuse_file_info fi;        memset(&fi, 0, sizeof(fi));        fi.fh = arg->fh;        fi.fh_old = fi.fh;        req->f->op.read(req, nodeid, arg->size, arg->offset, &fi);    } else        fuse_reply_err(req, ENOSYS);}




struct fuse_operations {    int (*getattr) (const char *, struct stat *);    int (*readlink) (const char *, char *, size_t);    int (*getdir) (const char *, fuse_dirh_t, fuse_dirfil_t);    int (*mknod) (const char *, mode_t, dev_t);    int (*mkdir) (const char *, mode_t);    int (*unlink) (const char *);    int (*rmdir) (const char *);    int (*symlink) (const char *, const char *);    int (*rename) (const char *, const char *);    int (*link) (const char *, const char *);    int (*chmod) (const char *, mode_t);    int (*chown) (const char *, uid_t, gid_t);    int (*truncate) (const char *, off_t);    int (*utime) (const char *, struct utimbuf *);    int (*open) (const char *, struct fuse_file_info *);    int (*read) (const char *, char *, size_t, off_t, struct fuse_file_info *);    int (*write) (const char *, const char *, size_t, off_t,struct fuse_file_info *);    int (*statfs) (const char *, struct statfs *);    int (*flush) (const char *, struct fuse_file_info *);    int (*release) (const char *, struct fuse_file_info *);    int (*fsync) (const char *, int, struct fuse_file_info *);    int (*setxattr) (const char *, const char *, const char *, size_t, int);    int (*getxattr) (const char *, const char *, char *, size_t);    int (*listxattr) (const char *, char *, size_t);    int (*removexattr) (const char *, const char *);};




static struct fuse_lowlevel_ops fuse_path_ops = {    //只列举了部分方法    .init = fuse_lib_init,    .destroy = fuse_lib_destroy,    .lookup = fuse_lib_lookup,    .forget = fuse_lib_forget,    .getattr = fuse_lib_getattr,    .setattr = fuse_lib_setattr,.access = fuse_lib_access,.read = fuse_lib_read,    .readlink = fuse_lib_readlink};



int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size,                 off_t off, struct fuse_file_info *fi){fuse_get_context()->private_data = http://www.mamicode.com/fs->user_data;"READ[%llu] %lu bytes from %llun",                    (unsigned long long) fi->fh, (unsigned long) size,                    (unsigned long long) off);         fuse_prepare_interrupt(f, req, &d);        res = fuse_fs_read(f->fs, path, buf, size, off, fi); //通过这个方法调用用户实现的方法        fuse_finish_interrupt(f, req, &d);        free(path);    }    pthread_rwlock_unlock(&f->tree_lock);     if (res >= 0) {        if (f->conf.debug)            fprintf(stderr, "   READ[%llu] %u bytesn",                    (unsigned long long)fi->fh, res);        if ((size_t) res > size)            fprintf(stderr, "fuse: read too many bytes");        fuse_reply_buf(req, buf, res); //返回结果    } else        reply_err(req, res);     free(buf);


 FUSE 流程



 fuse_main() (lib/helper.c)——fuse用户空间主函数,用户程序调用它时,fuse_main()函数解析相关参数(如mountpointmultithreaded),并调用fuse_mount()函数,接着调用fuse_new()函数,为fuse文件系统数据分配存储空间。最后调用fuse_loop()函数实现会话的接受与处理。


fuse_mount() (lib/mount.c)——创建UNIX本地套接口,创建并运行子进程fusermount


fusermount (util/fusermount.c)——确保fuse模块已经加载,通过UNIX套接口返回fuse模块的文件fdfuse_mount()函数。


fuse_new() (lib/fuse.c)——fuse创建数据结构空间,用来存储文件系统数据。


fuse_loop() (lib/fuse.c)( fuse_loop_mt() (lib/fuse_mt.c))——/dev/fuse (/dev 设备文件存储目录)读取文件系统调用,调用fuse_operationsfuse_lowlevel_ops结构中的处理函数,返回调用结果给/dev/fuse

FUSE Kernel模块由两部分组成:

第一部分——proc文件系统组件:Kernel/dev.c——回应io请求到/dev/fusefuse_dev_read()函数负责读出文件,并将来自“list of request”结构体的命令返回到调用函数。fuse_dev_write ()负责文件写入,并将写入的数据置放到“req→out”数据结构中。

第二部分——文件系统调用部分:kernel/file.ckernel/inode.ckernel/dir.c——调用request_send(),将请求加入到“list of request”结构体中,等待回复(reply)








两个重要的数据结构 fc, req


/* A Fuse connection. * This structure is created, when the filesystem is mounted, and is destroyed, when the * client device is closed and the filesystem is unmounted. */Struct  fuse_conn {/** Readers of the connection are waiting on this */    wait_queue_head_t waitq; // 等待执行请求的进程的队列    /** The list of pending requests */    struct list_head pending;  // 被挂起的请求 的队列    /** The list of requests being processed */    struct list_head processing; // 正在被处理的请求的 队列/** Pending interrupts */ struct list_head interrupts;  // 执行中被中断的请求的 队列...}


/*  *A request to the client */struct fuse_req{/** Used to wake up the task waiting for completion of request*/    wait_queue_head_t waitq;  // 请求的等待队列…}

fuse通过fuse_session_loop来启动守护程序,守护程序最终会调用fuse_dev_readv, fuse_dev_readv调用request_wait,使得进程在fcwaitq队列上睡眠。


Static  size_t  fuse_dev_readv(struct file *file, const struct iovec *iov,  unsigned long nr_segs, loff_t *off)                              {     ….     request_wait(fc); ….}


* Wait until a request is available on the pending list  *当前进程一直等待,直到挂起队列中有一个请求*/static void request_wait(struct fuse_conn *fc){DECLARE_WAITQUEUE(wait, current);  //定义一个队列节点变量wait,其与当前进程相关联        add_wait_queue_exclusive(&fc->waitq, &wait);  //将wait加入到fc->waitq等待队列中        //不断的检查fc的pending队列及interrupts队列,看是否有请求,没有请求一直while循环         while (fc->connected && !request_pending(fc)) {             set_current_state(TASK_INTERRUPTIBLE);             if (signal_pending(current)) break;             spin_unlock(&fc->lock);             schedule();  //选择一个进程运行             spin_lock(&fc->lock);         }         // 有请求,将进程设为TASK_RUNNING状态(被唤醒,被赋予CPU使用权)        set_current_state(TASK_RUNNING);         remove_wait_queue(&fc->waitq, &wait); // 将wait(当前进程)从等待队列中移除}③// fc的pending队列及interrupts队列,看是否有请求static int request_pending(struct fuse_conn *fc){return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);}

request_send是用户请求经过vfs(如上面的图),再到fuse operation中被调用的,它向/dev/fuse发送请求


void  request_send(struct fuse_conn *fc, struct fuse_req *req){    ……    queue_request(fc, req);  request_wait_answer(fc, req);……}⑤static void queue_request(struct fuse_conn *fc, struct fuse_req *req){    list_add_tail(&req->list, &fc->pending);  //将请求加入到pending队列    req->state = FUSE_REQ_PENDING;   if (!req->waiting) {  req->waiting = 1;  atomic_inc(&fc->num_waiting);    }wake_up(&fc->waitq);  //唤醒等待等列    kill_fasync(&fc->fasync, SIGIO, POLL_IN);}


/* Called with fc->lock held.  Releases, and then reacquires it. *///该调用会在req的waitq上睡眠,fuse守护程序处理完请求后,会将其唤醒static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req){	     if (!fc->no_interrupt) {		    /* Any signal may interrupt this */		    wait_answer_interruptible(fc, req);		   if (req->aborted)			   goto aborted;		   if (req->state == FUSE_REQ_FINISHED)			   return; 		   req->interrupted = 1;		   if (req->state == FUSE_REQ_SENT)			   queue_interrupt(fc, req);	     }	     if (req->force) {		spin_unlock(&fc->lock);		wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);		spin_lock(&fc->lock);	     } else {		   sigset_t oldset;		/* Only fatal signals may interrupt this */		block_sigs(&oldset);		wait_answer_interruptible(fc, req);		restore_sigs(&oldset);	     }	    if (req->aborted)		    goto aborted;	    if (req->state == FUSE_REQ_FINISHED)   return;	req->out.h.error = -EINTR;	req->aborted = 1;aborted:	if (req->locked) {		/* This is uninterruptible sleep, because data is		   being copied to/from the buffers of req.  During		   locked state, there mustn‘t be any filesystem		   operation (e.g. page fault), since that could lead		   to deadlock */		spin_unlock(&fc->lock);		wait_event(req->waitq, !req->locked);		spin_lock(&fc->lock);	}	if (req->state == FUSE_REQ_PENDING) {		list_del(&req->list);		__fuse_put_request(req);	} else if (req->state == FUSE_REQ_SENT) {		spin_unlock(&fc->lock);		wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);		spin_lock(&fc->lock);	}}} (左列七行)fuse守护程序处理完请求,最终通过fuse_dev_writev写回/dev/fuse,它将唤醒相应req中waitq的等待队列元素,从而让文件系统请求完成request_wait_answer,获取到结果。⑦/**Write a single reply to a request.  First the header is copied from the write buffer.  The request is then *searched on the processing list by the unique ID found in the header.  If found, then remove it from the list *and copy the rest of the buffer to the request. The request is finished by calling request_end() */static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,  unsigned long nr_segs, loff_t *off)                                                  {   ……..req = request_find(fc, oh.unique);   request_end(fc, req);   ….}
⑧/* * This function is called when a request is finished.  Either a reply has arrived or it was aborted (and not yet *sent) or some error occurred during communication with userspace, or the device file was closed.  The *requester thread is woken up (if still waiting), the ‘end‘ callback is called if given, else the reference to the *request is released Called with fc->lock, unlocks it */static void request_end(struct fuse_conn *fc, struct fuse_req *req){   ….   wake_up(&req->waitq);  //唤醒req上的等待队列   ……}



int fuse_session_loop(struct fuse_session *se) //在fuse_main中会被调用,或其多线程版本{    int res = 0;    struct fuse_chan *ch = fuse_session_next_chan(se, NULL);    size_t bufsize = fuse_chan_bufsize(ch);    char *buf = (char *) malloc(bufsize); //为channel分配好缓冲区    if (!buf) {        fprintf(stderr, "fuse: failed to allocate read buffer\n");        return -1;}//fuse daemon, loops    while (!fuse_session_exited(se)) {        struct fuse_chan *tmpch = ch;// 从/dev/fuse读请求,会等待一直到有请求为止        res = fuse_chan_recv(&tmpch, buf, bufsize);        if (res == -EINTR)   continue;        if (res <= 0)       break;        fuse_session_process(se, buf, res, tmpch);   //处理读到的请求    }free(buf);fuse_session_reset(se);return res < 0 ? -1 : 0;}


②int fuse_chan_recv(struct fuse_chan **chp, char *buf, size_t size){    struct fuse_chan *ch = *chp;    if (ch->compat)        return ((struct fuse_chan_ops_compat24 *) &ch->op)->receive(ch, buf, size);    else        return ch->op.receive(chp, buf, size); //由下面的一段代码可以发现,receive最终是通过// fuse_kern_chan_receive实现的,代码片段3分析该请求}③#define MIN_BUFSIZE 0x21000struct fuse_chan *fuse_kern_chan_new(int fd){    //channel的读写方法    struct fuse_chan_ops op = {        .receive = fuse_kern_chan_receive,        .send = fuse_kern_chan_send,        .destroy = fuse_kern_chan_destroy,};//设置bufsize大小    size_t bufsize = getpagesize() + 0x1000;    bufsize = bufsize < MIN_BUFSIZE ? MIN_BUFSIZE : bufsize;    return fuse_chan_new(&op, fd, bufsize, NULL);}
static int fuse_kern_chan_receive(struct fuse_chan **chp, char *buf,  size_t size){    struct fuse_chan *ch = *chp;    int err;    ssize_t res;    struct fuse_session *se = fuse_chan_session(ch);    assert(se != NULL);    // 一直轮询,直到读到请求为止 restart:    //fuse_chan_fd获取到/dev/fuse的文件描述符,调用read系统调用从设备读取请求res = read(fuse_chan_fd(ch), buf, size); //根据fuse设备驱动程序file结构的实现(dev.c),read将调用fuse_dev_read,该方法最终通过fuse_dev_readv//实现,根据代码中的注释,fuse_dev_read做了如下工作:// Read a single request into the userspace filesystem‘s buffer.  This function waits until a request is available, // then removes it from the pending list and copies request data to userspace buffer.// 而fuse_dev_read又调用request_wait,使得进程在fc->waitq上睡眠    if no data: goto restart    ………}

以上的分析对应了fuse filesystem daemon做的第一部分工作。当用户从控制台输入"rm /mnt/fuse/file"时,通过VFSsys_unlink),再到fusedir.c中实现的inode_operationsfile.c中实现的file_operations中的方法都会最终调用request_send,后面会讲到),这个请求最终被发到了/dev/fuse中,该请求的到达会唤醒正在等待的fuse守护程序,fuse守护程序读取该请求并进行处理,接下来介绍处理请求所作的工作。


⑤struct fuse_session *fuse_lowlevel_new_common(struct fuse_args *args,                                       const struct fuse_lowlevel_ops *op,                                       size_t op_size, void *userdata){//fuse_lowlevel_ops在之前的文章http://blog.chinaunix.net/u2/87570/showart_2166461.html中已经介绍//过了,开发者实现了fuse_lowlevel_ops并传递给fuse_lowlevel_common    struct fuse_ll *f;    struct fuse_session *se;struct fuse_session_ops sop = {    //最终调用的处理方法        .process = fuse_ll_process, //分析见代码片段5        .destroy = fuse_ll_destroy,    };  …….}⑥static void fuse_ll_process(void *data, const char *buf, size_t len, struct fuse_chan *ch){    struct fuse_ll *f = (struct fuse_ll *) data;    struct fuse_in_header *in = (struct fuse_in_header *) buf;    const void *inarg = buf + sizeof(struct fuse_in_header);struct fuse_req *req;    //创建并初始化一个请求    req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req));    if (req == NULL) {        fprintf(stderr, "fuse: failed to allocate request\n");        return;    }    req->f = f;req->unique = in->unique;……//根据opcode调用fuse_ll_ops中相应的方法,fuse_ll_ops的介绍// http://blog.chinaunix.net/u2/87570/showart_2166461.html    fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg);    }}


以上代码对应中流程中perform unlink的工作,实际上就是执行开发者实现的一组方法来完成相关的工作,接下来就是把执行完请求后需要的数据返回,最终是通过send_reply实现的


⑦static int send_reply(fuse_req_t req, int error, const void *arg, size_t argsize){    struct iovec iov[2];    int count = 1;    if (argsize) {        iov[1].iov_base = (void *) arg;        iov[1].iov_len = argsize;        count++;    }    return send_reply_iov(req, error, iov, count);}⑧static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, int count){    ……    res = fuse_chan_send(req->ch, iov, count);    free_req(req);    return res;}⑨static int fuse_kern_chan_send(struct fuse_chan *ch, const struct iovec iov[],  size_t count){    if (iov) {        //将数据写到/dev/fuse上,最终会调用fuse_dev_write        ssize_t res = writev(fuse_chan_fd(ch), iov, count);    ……    return 0;}




void request_send(struct fuse_conn *fc, struct fuse_req *req){         req->isreply = 1;         spin_lock(&fc->lock);         if (!fc->connected)                   req->out.h.error = -ENOTCONN;         else if (fc->conn_error)                   req->out.h.error = -ECONNREFUSED;         else {        //将请求加入请求队列                   queue_request(fc, req);                   /* acquire extra reference, since request is still needed after request_end() */                   __fuse_get_request(req);        //等待结果                   request_wait_answer(fc, req);         }         spin_unlock(&fc->lock);}