首页 > 代码库 > 写文件的流程
写文件的流程
许多文件系统都是通过generic_file_write()函数来实现文件对象的write方法,即write(库函数)->sys_write()->generic_file_write():
ssize_t generic_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; down(&inode->i_sem); ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); up(&inode->i_sem); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ssize_t err; err = sync_page_range(inode, mapping, *ppos - ret, ret); if (err < 0) ret = err; } return ret; }
generic_file_write会调用__generic_file_write_nolock(),即write(库函数)->sys_write()->generic_file_write()->__generic_file_write_nolock:
ssize_t __generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, file); ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(&kiocb); return ret; }
write(库函数)->sys_write()->generic_file_write()->__generic_file_write_nolock()->__generic_file_aio_write_nolock():
ssize_t __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; unsigned long seg; loff_t pos; ssize_t written; ssize_t err; ocount = 0; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *iv = &iov[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; nr_segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *ppos; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; written = 0; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; if (count == 0) goto out; err = remove_suid(file->f_dentry); if (err) goto out; inode_update_time(inode, 1); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, count, ocount); if (written < 0 || written == count) goto out; /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ pos += written; count -= written; } written = generic_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count, written); out: current->backing_dev_info = NULL; return written ? written : err; }
write(库函数)->sys_write()->generic_file_write()->__generic_file_write_nolock()->__generic_file_aio_write_nolock()->generic_file_buffered_write():
ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, loff_t *ppos, size_t count, ssize_t written) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; struct address_space_operations *a_ops = mapping->a_ops; struct inode *inode = mapping->host; long status = 0; struct page *page; struct page *cached_page = NULL; size_t bytes; struct pagevec lru_pvec; const struct iovec *cur_iov = iov; /* current iovec */ size_t iov_base = 0; /* offset in the current iovec */ char __user *buf; pagevec_init(&lru_pvec, 0); buf = iov->iov_base + written; /* handle partial DIO write */ do { unsigned long index; unsigned long offset; size_t copied; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ //获取要写的缓冲页面索引(如何根据页索引在radix树中获取到指定页描述符,ULK-PAGE600) index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; //最后剩一点写入内容的处理 if (bytes > count) bytes = count; /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. */ fault_in_pages_readable(buf, bytes); //在radix树里面查找要被写的page,如果不存在则创建一个,见下面分析 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); if (!page) { status = -ENOMEM; break; } //为这个page准备一组buffer_head结构,用于描述组成这个page的数据块,见下面分析 status = a_ops->prepare_write(file, page, offset, offset+bytes); if (unlikely(status)) { loff_t isize = i_size_read(inode); /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ unlock_page(page); page_cache_release(page); if (pos + bytes > isize) vmtruncate(inode, isize); break; } if (likely(nr_segs == 1)) copied = (page, offset, buf, bytes); else copied = filemap_copy_from_user_iovec(page, offset, cur_iov, iov_base, bytes); flush_dcache_page(page); //把基础缓冲区标记为脏,以便随后把他们都写到磁盘。 status = a_ops->commit_write(file, page, offset, offset+bytes); if (likely(copied > 0)) { if (!status) status = copied; if (status >= 0) { written += status; count -= status; pos += status; buf += status; if (unlikely(nr_segs > 1)) filemap_set_next_iovec(&cur_iov, &iov_base, status); } } if (unlikely(copied != bytes)) if (status >= 0) status = -EFAULT; unlock_page(page); mark_page_accessed(page); page_cache_release(page); if (status < 0) break; balance_dirty_pages_ratelimited(mapping); cond_resched(); } while (count); *ppos = pos; if (cached_page) page_cache_release(cached_page); /* * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC */ if (likely(status >= 0)) { if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (!a_ops->writepage || !is_sync_kiocb(iocb)) status = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); } } /* * If we get here for O_DIRECT writes then we must have fallen through * to buffered writes (block instantiation inside i_size). So we sync * the file data here, to try to honour O_DIRECT expectations. */ if (unlikely(file->f_flags & O_DIRECT) && written) status = filemap_write_and_wait(mapping); pagevec_lru_add(&lru_pvec); return written ? written : status; } static inline struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, struct page **cached_page, struct pagevec *lru_pvec) { int err; struct page *page; repeat: //根据address_space地址和缓冲页的索引,获取缓冲页面的描述符(ULK-PAGE602) page = find_lock_page(mapping, index); if (!page) { if (!*cached_page) { *cached_page = page_cache_alloc(mapping); if (!*cached_page) return NULL; } //把一个新页的描述符插入到页高速缓存--在radix树中出入新节点 err = add_to_page_cache(*cached_page, mapping, index, GFP_KERNEL); if (err == -EEXIST) goto repeat; if (err == 0) { page = *cached_page; page_cache_get(page); if (!pagevec_add(lru_pvec, page)) __pagevec_lru_add(lru_pvec); *cached_page = NULL; } } return page; }
//prepare_write分析
address_space对象的prepare_write和commit_write方法专用于由generic_file_write()实现的通用写操作,这个函数适用于普通文件和块设备文件。每个磁盘文件系统都定义了自己的prepare_write方法。与读操作类似,这个方法只是普通函数的封装。例如,Ext2文件系统通过下列函数实现prepare_write方法
//在fs/buffer.c目录中
int ext2_prepare_write(struct file *file,struct page *page,unsigned from ,unsigned to)
{
return block_prepare_write(page,from,to,ext2_get_block);
}
一旦prepare_write,generic_file_write()函数就用存放在用户地址空间中的数据更新高速缓存页面。接下来,调用address_space对象的commit_write方法。这个方法由generic_commit_write()函数实现。generic_commit_write()函数执行如下步骤:
1.调用__block_commit_write()函数,执行如下步骤:
A.考虑页中受写操作影响的所有缓冲区;对于其中的每个缓冲区,将对应缓冲区首部的BH_Uptodate和BH_Dirty标志置位。
B.标记相应索引节点为脏,将索引节点加入超级块脏的索引节点连接
C.如果缓冲区页中的所有缓冲区是最新的,则将PG_uptodate标志置位
D.将页的PG_Dirty标志置位,并在基树中将页标记成脏
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。