--- linux-2.6.18.2/fs/ext2/file.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/ext2/file.c 2006-09-25 15:40:02 +0200 @@ -53,6 +53,7 @@ const struct file_operations ext2_file_o .readv = generic_file_readv, .writev = generic_file_writev, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; --- linux-2.6.18.2/fs/ext3/file.c 2006-06-18 04:54:33 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/ext3/file.c 2006-09-25 15:40:02 +0200 @@ -119,6 +119,7 @@ const struct file_operations ext3_file_o .release = ext3_release_file, .fsync = ext3_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; --- linux-2.6.18.2/fs/jfs/file.c 2006-06-18 04:54:36 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/jfs/file.c 2006-09-25 15:40:02 +0200 @@ -111,6 +112,7 @@ const struct file_operations jfs_file_op .readv = generic_file_readv, .writev = generic_file_writev, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .fsync = jfs_fsync, .release = jfs_release, .ioctl = jfs_ioctl, --- linux-2.6.18.2/fs/namei.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/namei.c 2006-11-04 08:24:09 +0100 @@ -1503,6 +1557,14 @@ int may_open(struct nameidata *nd, int a if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; +#ifdef CONFIG_VSERVER_COWBL + if (IS_COW(inode) && (flag & FMODE_WRITE)) { + if (IS_COW_LINK(inode)) + return -EMLINK; + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + mark_inode_dirty(inode); + } +#endif error = vfs_permission(nd, acc_mode); if (error) return error; --- linux-2.6.18.2/fs/namei.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/namei.c 2006-11-04 08:24:09 +0100 @@ -1589,6 +1652,11 @@ int open_namei(int dfd, const char *path struct dentry *dir; int count = 0; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif acc_mode = ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ --- linux-2.6.18.2/fs/namei.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/namei.c 2006-11-04 08:24:09 +0100 @@ -1690,6 +1758,22 @@ do_last: goto exit; ok: error = may_open(nd, acc_mode, flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + struct dentry *dentry; + dentry = cow_break_link(pathname); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto exit; + } + dput(dentry); + release_open_intent(nd); + path_release(nd); + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (error) goto exit; return 0; --- linux-2.6.18.2/fs/namei.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/namei.c 2006-11-04 08:24:09 +0100 @@ -2591,6 +2688,126 @@ int vfs_follow_link(struct nameidata *nd return __vfs_follow_link(nd, link); } + +#ifdef CONFIG_VSERVER_COWBL + +#include + +struct dentry *cow_break_link(const char *pathname) +{ + int ret, mode, pathlen; + struct nameidata old_nd, dir_nd; + struct dentry *old_dentry, *new_dentry; + struct dentry *res = ERR_PTR(-EMLINK); + struct vfsmount *old_mnt, *new_mnt; + struct file *old_file; + struct file *new_file; + char *to, *path, pad='\251'; + loff_t size; + + vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname); + path = kmalloc(PATH_MAX, GFP_KERNEL); + + ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd); + vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret); + old_dentry = old_nd.dentry; + old_mnt = old_nd.mnt; + mode = old_dentry->d_inode->i_mode; + + to = d_path(old_dentry, old_mnt, path, PATH_MAX-2); + pathlen = strlen(to); + vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to); + + to[pathlen+1] = 0; +retry: + to[pathlen] = pad--; + if (pad <= '\240') + goto out_rel_old; + + vxdprintk(VXD_CBIT(misc, 1), "temp copy »%s«", to); + ret = path_lookup(to, + LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd); + + /* this puppy downs the inode sem */ + new_dentry = lookup_create(&dir_nd, 0); + vxdprintk(VXD_CBIT(misc, 2), + "lookup_create(new): %p", new_dentry); + if (!new_dentry) { + path_release(&dir_nd); + goto retry; + } + + ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "vfs_create(new): %d", ret); + if (ret == -EEXIST) { + + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + path_release(&dir_nd); + goto retry; + } + + new_mnt = dir_nd.mnt; + + dget(old_dentry); + mntget(old_mnt); + /* this one cleans up the dentry in case of failure */ + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(old): %p", old_file); + if (!old_file) + goto out_rel_both; + + dget(new_dentry); + mntget(new_mnt); + /* this one cleans up the dentry in case of failure */ + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(new): %p", new_file); + if (!new_file) + goto out_fput_old; + + size = i_size_read(old_file->f_dentry->d_inode); + ret = vfs_sendfile(new_file, old_file, NULL, size, 0); + vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret); + + if (ret < 0) + goto out_fput_both; + + ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry, + old_nd.dentry->d_parent->d_inode, old_dentry); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + if (!ret) { + res = new_dentry; + dget(new_dentry); + } + +out_fput_both: + vxdprintk(VXD_CBIT(misc, 3), + "fput(new_file=%p[#%d])", new_file, + atomic_read(&new_file->f_count)); + fput(new_file); + +out_fput_old: + vxdprintk(VXD_CBIT(misc, 3), + "fput(old_file=%p[#%d])", old_file, + atomic_read(&old_file->f_count)); + fput(old_file); + +out_rel_both: + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + + path_release(&dir_nd); +out_rel_old: + path_release(&old_nd); + kfree(path); + return res; +} + +#endif + /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { --- linux-2.6.18.2/fs/open.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/open.c 2006-11-04 08:24:09 +0100 @@ -433,11 +441,11 @@ long do_utimes(int dfd, char __user *fil if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; --- linux-2.6.18.2/fs/open.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/open.c 2006-11-04 08:24:09 +0100 @@ -666,11 +675,11 @@ asmlinkage long sys_fchmodat(int dfd, co error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) --- linux-2.6.18.2/fs/open.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/open.c 2006-11-04 08:24:09 +0100 @@ -737,7 +747,11 @@ asmlinkage long sys_chown(const char __u error = user_path_walk(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; --- linux-2.6.18.2/fs/open.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/open.c 2006-11-04 08:24:09 +0100 @@ -756,7 +770,11 @@ asmlinkage long sys_fchownat(int dfd, co follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; error = __user_walk_fd(dfd, filename, follow, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } out: --- linux-2.6.18.2/fs/open.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/open.c 2006-11-04 08:24:09 +0100 @@ -770,7 +788,11 @@ asmlinkage long sys_lchown(const char __ error = user_path_walk_link(filename, &nd); if (!error) { - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); } return error; --- linux-2.6.18.2/fs/read_write.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/read_write.c 2006-09-25 15:40:02 +0200 @@ -636,6 +636,73 @@ sys_writev(unsigned long fd, const struc return ret; } +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos, + size_t count, loff_t max) +{ + struct inode * in_inode, * out_inode; + loff_t pos; + ssize_t ret; + + /* verify in_file */ + in_inode = in_file->f_dentry->d_inode; + if (!in_inode) + return -EINVAL; + if (!in_file->f_op || !in_file->f_op->sendfile) + return -EINVAL; + + if (!ppos) + ppos = &in_file->f_pos; + else + if (!(in_file->f_mode & FMODE_PREAD)) + return -ESPIPE; + + ret = rw_verify_area(READ, in_file, ppos, count); + if (ret < 0) + return ret; + count = ret; + + /* verify out_file */ + out_inode = out_file->f_dentry->d_inode; + if (!out_inode) + return -EINVAL; + if (!out_file->f_op || !out_file->f_op->sendpage) + return -EINVAL; + + ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + if (ret < 0) + return ret; + count = ret; + + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) + return ret; + + if (!max) + max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); + + pos = *ppos; + if (unlikely(pos < 0)) + return -EINVAL; + if (unlikely(pos + count > max)) { + if (pos >= max) + return -EOVERFLOW; + count = max - pos; + } + + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + + if (ret > 0) { + current->rchar += ret; + current->wchar += ret; + } + + if (*ppos > max) + return -EOVERFLOW; + return ret; +} + +EXPORT_SYMBOL(vfs_sendfile); + static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { --- linux-2.6.18.2/fs/read_write.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/read_write.c 2006-09-25 15:40:02 +0200 @@ -640,8 +707,6 @@ ***** size_t count, loff_t max) { struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; - loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; --- linux-2.6.18.2/fs/read_write.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/read_write.c 2006-09-25 15:40:02 +0200 @@ -654,22 +719,6 @@ static ssize_t do_sendfile(int out_fd, i goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) - goto fput_in; - retval = -ESPIPE; - if (!ppos) - ppos = &in_file->f_pos; - else - if (!(in_file->f_mode & FMODE_PREAD)) - goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval < 0) - goto fput_in; - count = retval; retval = security_file_permission (in_file, MAY_READ); if (retval) --- linux-2.6.18.2/fs/read_write.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/read_write.c 2006-09-25 15:40:02 +0200 @@ -684,39 +733,9 @@ static ssize_t do_sendfile(int out_fd, i goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) - goto fput_out; - out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval < 0) - goto fput_out; - count = retval; - - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - - if (!max) - max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); - - pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; - if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; - if (pos >= max) - goto fput_out; - count = max - pos; - } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + retval = vfs_sendfile(out_file, in_file, ppos, count, max); - if (retval > 0) { - current->rchar += retval; - current->wchar += retval; - } current->syscr++; current->syscw++; --- linux-2.6.18.2/fs/read_write.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/read_write.c 2006-09-25 15:40:02 +0200 @@ -720,9 +739,6 @@ ***** current->syscr++; current->syscw++; - if (*ppos > max) - retval = -EOVERFLOW; - fput_out: fput_light(out_file, fput_needed_out); fput_in: --- linux-2.6.18.2/fs/reiserfs/file.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/reiserfs/file.c 2006-09-25 15:40:02 +0200 @@ -1572,6 +1572,7 @@ const struct file_operations reiserfs_fi .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, --- linux-2.6.18.2/fs/xfs/linux-2.6/xfs_file.c 2006-09-20 16:58:37 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/xfs/linux-2.6/xfs_file.c 2006-09-25 15:40:02 +0200 @@ -545,6 +545,7 @@ const struct file_operations xfs_file_op .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read, .splice_write = xfs_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, --- linux-2.6.18.2/fs/xfs/linux-2.6/xfs_file.c 2006-09-20 16:58:37 +0200 +++ linux-2.6.18.2-vs2.1.1/fs/xfs/linux-2.6/xfs_file.c 2006-09-25 15:40:02 +0200 @@ -570,6 +571,7 @@ const struct file_operations xfs_invis_f .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, .sendfile = xfs_file_sendfile_invis, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read_invis, .splice_write = xfs_file_splice_write_invis, .unlocked_ioctl = xfs_file_ioctl_invis, --- linux-2.6.18.2/include/linux/fs.h 2006-09-20 16:58:43 +0200 +++ linux-2.6.18.2-vs2.1.1/include/linux/fs.h 2006-11-01 01:01:33 +0100 @@ -178,6 +186,14 @@ ***** #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#ifdef CONFIG_VSERVER_COWBL +# define IS_COW(inode) (IS_IUNLINK(inode) && IS_IMMUTABLE(inode)) +# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1)) +#else +# define IS_COW(inode) (0) +# define IS_COW_LINK(inode) (0) +#endif + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ --- linux-2.6.18.2/include/linux/fs.h 2006-09-20 16:58:43 +0200 +++ linux-2.6.18.2-vs2.1.1/include/linux/fs.h 2006-11-01 01:01:33 +0100 @@ -1116,6 +1144,7 @@ extern ssize_t vfs_readv(struct file *, unsigned long, loff_t *); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t); /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called --- linux-2.6.18.2/include/linux/fs.h 2006-09-20 16:58:43 +0200 +++ linux-2.6.18.2-vs2.1.1/include/linux/fs.h 2006-11-01 01:01:33 +0100 @@ -1657,6 +1686,7 @@ extern ssize_t do_sync_write(struct file ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); --- linux-2.6.18.2/include/linux/syscalls.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/include/linux/syscalls.h 2006-09-25 15:40:02 +0200 @@ -293,6 +293,8 @@ asmlinkage long sys_symlink(const char _ asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); +asmlinkage long sys_copyfile(const char __user *from, const char __user *to, + umode_t mode); asmlinkage long sys_chmod(const char __user *filename, mode_t mode); asmlinkage long sys_fchmod(unsigned int fd, mode_t mode); --- linux-2.6.18.2/mm/filemap.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/mm/filemap.c 2006-10-25 03:39:09 +0200 @@ -1253,6 +1253,31 @@ int file_send_actor(read_descriptor_t * return written; } +/* FIXME: It would be as simple as this, if we had a (void __user*) to write. + * We already have a kernel buffer, so it should be even simpler, right? ;) + * + * Yes, sorta. After duplicating the complete path of generic_file_write(), + * at least some special cases could be removed, so the copy is simpler than + * the original. But it remains a copy, so overall complexity increases. + */ +static ssize_t +generic_kernel_file_write(struct file *, const char *, size_t, loff_t *); + +ssize_t generic_file_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) +{ + ssize_t ret; + char *kaddr; + + kaddr = kmap(page); + ret = generic_kernel_file_write(file, kaddr + offset, size, ppos); + kunmap(page); + + return ret; +} + +EXPORT_SYMBOL(generic_file_sendpage); + ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos, size_t count, read_actor_t actor, void *target) { --- linux-2.6.18.2/mm/filemap.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/mm/filemap.c 2006-10-25 03:39:09 +0200 @@ -1916,6 +1941,19 @@ int remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(remove_suid); +static inline size_t +filemap_copy_from_kernel(struct page *page, unsigned long offset, + const char *buf, unsigned bytes) +{ + char *kaddr; + + kaddr = kmap(page); + memcpy(kaddr + offset, buf, bytes); + kunmap(page); + + return bytes; +} + size_t __filemap_copy_from_user_iovec_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) --- linux-2.6.18.2/mm/filemap.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/mm/filemap.c 2006-10-25 03:39:09 +0200 @@ -2296,6 +2334,175 @@ out: } EXPORT_SYMBOL(generic_file_aio_write_nolock); +static inline void +filemap_set_next_kvec(const struct kvec **iovp, size_t *basep, size_t bytes) +{ + const struct kvec *iov = *iovp; + size_t base = *basep; + + while (bytes) { + int copy = min(bytes, iov->iov_len - base); + + bytes -= copy; + base += copy; + if (iov->iov_len == base) { + iov++; + base = 0; + } + } + *iovp = iov; + *basep = base; +} + +/* + * TODO: + * This largely tries to copy generic_file_aio_write_nolock(), although it + * doesn't have to be nearly as generic. A real cleanup should either + * merge this into generic_file_aio_write_nolock() as well or keep it special + * and remove as much code as possible. + */ +static ssize_t +generic_kernel_file_aio_write_nolock(struct kiocb *iocb, const struct kvec*iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space * mapping = file->f_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + size_t ocount; /* original count */ + size_t count; /* after file limit checks */ + struct inode *inode = mapping->host; + long status = 0; + loff_t pos; + struct page *page; + struct page *cached_page = NULL; + const int isblk = S_ISBLK(inode->i_mode); + ssize_t written; + ssize_t err; + size_t bytes; + struct pagevec lru_pvec; + const struct kvec *cur_iov = iov; /* current kvec */ + size_t iov_base = 0; /* offset in the current kvec */ + unsigned long seg; + char *buf; + + ocount = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct kvec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + ocount += iv->iov_len; + if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) + return -EINVAL; + } + + count = ocount; + pos = *ppos; + pagevec_init(&lru_pvec, 0); + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + written = 0; + + err = generic_write_checks(file, &pos, &count, isblk); + if (err) + goto out; + + + if (count == 0) + goto out; + + remove_suid(file->f_dentry); + file_update_time(file); + + /* There is no sane reason to use O_DIRECT */ + BUG_ON(file->f_flags & O_DIRECT); + + buf = iov->iov_base; + do { + unsigned long index; + unsigned long offset; + size_t copied; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + if (!page) { + status = -ENOMEM; + break; + } + + status = a_ops->prepare_write(file, page, offset, offset+bytes); + if (unlikely(status)) { + loff_t isize = i_size_read(inode); + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + if (pos + bytes > isize) + vmtruncate(inode, isize); + break; + } + + BUG_ON(nr_segs != 1); + copied = filemap_copy_from_kernel(page, offset, buf, bytes); + + flush_dcache_page(page); + status = a_ops->commit_write(file, page, offset, offset+bytes); + if (likely(copied > 0)) { + if (!status) + status = copied; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + if (unlikely(nr_segs > 1)) + filemap_set_next_kvec(&cur_iov, + &iov_base, status); + } + } + if (unlikely(copied != bytes)) + if (status >= 0) + status = -EFAULT; + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + if (status < 0) + break; + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } while (count); + *ppos = pos; + + if (cached_page) + page_cache_release(cached_page); + + /* + * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC + */ + if (status >= 0) { + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) + status = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + } + + err = written ? written : status; +out: + pagevec_lru_add(&lru_pvec); + current->backing_dev_info = 0; + return err; +} + ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) --- linux-2.6.18.2/mm/filemap.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/mm/filemap.c 2006-10-25 03:39:09 +0200 @@ -2345,6 +2552,21 @@ generic_file_write_nolock(struct file *f ret = wait_on_sync_kiocb(&kiocb); return ret; } + +static ssize_t +generic_kernel_file_write_nolock(struct file *file, const struct kvec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + ret = generic_kernel_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + return ret; +} + EXPORT_SYMBOL(generic_file_write_nolock); ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, --- linux-2.6.18.2/mm/filemap.c 2006-09-20 16:58:44 +0200 +++ linux-2.6.18.2-vs2.1.1/mm/filemap.c 2006-10-25 03:39:09 +0200 @@ -2399,6 +2621,21 @@ ssize_t generic_file_write(struct file * } EXPORT_SYMBOL(generic_file_write); +static ssize_t generic_kernel_file_write(struct file *file, const char *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_mapping->host; + ssize_t err; + struct kvec local_iov = { .iov_base = (char *) buf, + .iov_len = count }; + + mutex_lock(&inode->i_mutex); + err = generic_kernel_file_write_nolock(file, &local_iov, 1, ppos); + mutex_unlock(&inode->i_mutex); + + return err; +} + ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) {