--- linux-2.6.22.9/fs/namei.c 2007-07-09 13:19:27 +0200 +++ linux-2.6.22.9-vs2.3.0.27.7/fs/namei.c 2007-10-19 23:38:00 +0200 @@ -31,6 +31,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -224,6 +231,30 @@ int generic_permission(struct inode *ino return -EACCES; } +static inline int dx_barrier(struct inode *inode) +{ + if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return 1; + } + return 0; +} + +static inline int dx_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (dx_barrier(inode)) + return -EACCES; + if (dx_notagcheck(nd) || + dx_check(inode->i_tag, DX_HOSTID|DX_ADMIN|DX_WATCH|DX_IDENT)) + return 0; + + vxwprintk(1, "tag=%d denied access to %p[#%d,%lu] »%s«.", + dx_current_tag(), inode, inode->i_tag, inode->i_ino, + vxd_cond_path(nd)); + return -EACCES; +} + int permission(struct inode *inode, int mask, struct nameidata *nd) { umode_t mode = inode->i_mode; @@ -234,14 +265,14 @@ int permission(struct inode *inode, int /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (IS_IMMUTABLE(inode) && !IS_COW(inode)) return -EACCES; } @@ -257,6 +288,12 @@ int permission(struct inode *inode, int /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + + if ((inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC) && + (inode->i_sb->s_magic != PROC_SUPER_MAGIC) && + (retval = dx_permission(inode, mask, nd))) + return retval; + if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -432,6 +469,8 @@ static int exec_permission_lite(struct i { umode_t mode = inode->i_mode; + if (dx_barrier(inode)) + return -EACCES; if (inode->i_op && inode->i_op->permission) return -EAGAIN; @@ -732,7 +771,8 @@ static __always_inline void follow_dotdo if (nd->dentry == fs->root && nd->mnt == fs->rootmnt) { read_unlock(&fs->lock); - break; + /* for sane '/' avoid follow_mount() */ + return; } read_unlock(&fs->lock); spin_lock(&dcache_lock); @@ -769,16 +809,40 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } else if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) { + if (!vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT)) + goto hidden; + } else { + if (!dx_notagcheck(nd) && !dx_check(inode->i_tag, + DX_WATCH | DX_ADMIN | DX_HOSTID | DX_IDENT)) + goto hidden; + } done: path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; +hidden: + vxwprintk(1, "tag=%d did lookup hidden %p[#%d,%lu] »%s«.", + dx_current_tag(), inode, inode->i_tag, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -1399,7 +1463,8 @@ static inline int check_sticky(struct in * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, + int isdir, struct nameidata *nd) { int error; @@ -1409,13 +1474,13 @@ static int may_delete(struct inode *dir, BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(victim->d_name.name, victim->d_inode, dir); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = permission(dir,MAY_WRITE | MAY_EXEC, nd); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1546,6 +1611,14 @@ int may_open(struct nameidata *nd, int a if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; +#ifdef CONFIG_VSERVER_COWBL + if (IS_COW(inode) && (flag & FMODE_WRITE)) { + if (IS_COW_LINK(inode)) + return -EMLINK; + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + mark_inode_dirty(inode); + } +#endif error = vfs_permission(nd, acc_mode); if (error) return error; @@ -1562,7 +1635,8 @@ int may_open(struct nameidata *nd, int a return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) + } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + && (flag & FMODE_WRITE)) return -EROFS; /* * An append-only file must be opened in append mode for writing. @@ -1650,6 +1724,11 @@ int open_namei(int dfd, const char *path struct dentry *dir; int count = 0; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif acc_mode = ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ @@ -1743,6 +1822,22 @@ do_last: goto exit; ok: error = may_open(nd, acc_mode, flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + struct dentry *dentry; + dentry = cow_break_link(pathname); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto exit; + } + dput(dentry); + release_open_intent(nd); + path_release(nd); + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (error) goto exit; return 0; @@ -1854,16 +1949,25 @@ fail: } EXPORT_SYMBOL_GPL(lookup_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int vfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if (!(S_ISCHR(mode) || S_ISBLK(mode))) + goto okay; + + if (!capable(CAP_MKNOD)) return -EPERM; + if (S_ISCHR(mode) && !vs_chrdev_perm(dev, DATTR_CREATE)) + return -EPERM; + if (S_ISBLK(mode) && !vs_blkdev_perm(dev, DATTR_CREATE)) + return -EPERM; +okay: if (!dir->i_op || !dir->i_op->mknod) return -EPERM; @@ -1906,11 +2010,12 @@ asmlinkage long sys_mknodat(int dfd, con error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, - new_decode_dev(dev)); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + new_decode_dev(dev), &nd); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + 0, &nd); break; case S_IFDIR: error = -EPERM; @@ -1933,9 +2038,10 @@ asmlinkage long sys_mknod(const char __u return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int vfs_mkdir(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1977,7 +2083,7 @@ asmlinkage long sys_mkdirat(int dfd, con if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.dentry->d_inode, dentry, mode, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2020,9 +2126,10 @@ void dentry_unhash(struct dentry *dentry spin_unlock(&dcache_lock); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(dir, dentry, 1, nd); if (error) return error; @@ -2084,7 +2191,7 @@ static long do_rmdir(int dfd, const char error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); dput(dentry); exit2: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2100,9 +2207,10 @@ asmlinkage long sys_rmdir(const char __u return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +int vfs_unlink(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 0); + int error = may_delete(dir, dentry, 0, nd); if (error) return error; @@ -2164,7 +2272,7 @@ static long do_unlinkat(int dfd, const c inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); exit2: dput(dentry); } @@ -2199,9 +2307,10 @@ asmlinkage long sys_unlink(const char __ return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -2245,7 +2354,7 @@ asmlinkage long sys_symlinkat(const char if (IS_ERR(dentry)) goto out_unlock; - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2262,7 +2371,8 @@ asmlinkage long sys_symlink(const char _ return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +int vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, struct nameidata *nd) { struct inode *inode = old_dentry->d_inode; int error; @@ -2270,7 +2380,7 @@ int vfs_link(struct dentry *old_dentry, if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry, nd); if (error) return error; @@ -2280,7 +2390,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; @@ -2340,7 +2450,7 @@ asmlinkage long sys_linkat(int olddfd, c error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry, &nd); dput(new_dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2472,14 +2582,14 @@ int vfs_rename(struct inode *old_dir, st if (old_dentry->d_inode == new_dentry->d_inode) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(old_dir, old_dentry, is_dir, NULL); if (error) return error; if (!new_dentry->d_inode) error = may_create(new_dir, new_dentry, NULL); else - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(new_dir, new_dentry, is_dir, NULL); if (error) return error; @@ -2557,6 +2667,9 @@ static int do_rename(int olddfd, const c error = -EINVAL; if (old_dentry == trap) goto exit4; + error = -EROFS; + if (MNT_IS_RDONLY(newnd.mnt)) + goto exit4; new_dentry = lookup_hash(&newnd); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) @@ -2650,6 +2763,221 @@ int vfs_follow_link(struct nameidata *nd return __vfs_follow_link(nd, link); } + +#ifdef CONFIG_VSERVER_COWBL + +#include + +static inline +long do_cow_splice(struct file *in, struct file *out, size_t len) +{ + loff_t ppos = 0; + + return do_splice_direct(in, &ppos, out, len, 0); +} + +{ + printk("%p: %p[»%.*s«:%d:%d] %02x %02x %02x %02x\n", + current, de, de->d_name.len, de->d_name.name, + de->d_name.len, d_unhashed(de), + de->d_name.name[0], de->d_name.name[1], + de->d_name.name[2], de->d_name.name[3]); +} + +struct dentry *cow_break_link(const char *pathname) +{ + int ret, mode, pathlen; + struct nameidata old_nd, dir_nd; + struct dentry *old_dentry, *new_dentry; + struct dentry *res, *dir, *trap; + struct vfsmount *old_mnt, *new_mnt; + struct file *old_file; + struct file *new_file; + char *to, *path, pad='\251'; + loff_t size; + + vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname); + path = kmalloc(PATH_MAX, GFP_KERNEL); + res = ERR_PTR(-ENOMEM); + if (!path) + goto out; + + /* old_nd will have refs to dentry and mnt */ + ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd); + vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret); + res = ERR_PTR(ret); + if (ret < 0) + goto out_free_path; + + old_dentry = old_nd.dentry; + old_mnt = old_nd.mnt; + mode = old_dentry->d_inode->i_mode; + + to = d_path(old_dentry, old_mnt, path, PATH_MAX-2); + pathlen = strlen(to); + vxdprintk(VXD_CBIT(misc, 2), "old path »%s« [»%.*s«:%d]", to, + old_dentry->d_name.len, old_dentry->d_name.name, + old_dentry->d_name.len); + + to[pathlen+1] = 0; +retry: + to[pathlen] = pad--; + res = ERR_PTR(-EMLINK); + if (pad <= '\240') + goto out_rel_old; + + vxdprintk(VXD_CBIT(misc, 1), "temp copy »%s«", to); + /* dir_nd will have refs to dentry and mnt */ + ret = path_lookup(to, + LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "path_lookup(new): %d", ret); + if (ret < 0) + goto retry; + + /* this puppy downs the inode mutex */ + new_dentry = lookup_create(&dir_nd, 0); + vxdprintk(VXD_CBIT(misc, 2), + "lookup_create(new): %p [»%.*s«:%d]", new_dentry, + new_dentry->d_name.len, new_dentry->d_name.name, + new_dentry->d_name.len); + if (!new_dentry || IS_ERR(new_dentry)) { + path_release(&dir_nd); + goto retry; + } + dir = dir_nd.dentry; + + ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "vfs_create(new): %d", ret); + if (ret == -EEXIST) { + mutex_unlock(&dir->d_inode->i_mutex); + dput(new_dentry); + path_release(&dir_nd); + goto retry; + } + else if (ret < 0) { + res = ERR_PTR(ret); + goto out_unlock_new; + } + + /* drop out early */ + if (d_unhashed(old_dentry)) + goto out_unlock_new; + + new_mnt = dir_nd.mnt; + + dget(old_dentry); + mntget(old_mnt); + /* this one cleans up the dentry/mnt in case of failure */ + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(old): %p", old_file); + if (!old_file || IS_ERR(old_file)) { + res = IS_ERR(old_file) ? (void *) old_file : res; + goto out_rel_both; + } + + dget(new_dentry); + mntget(new_mnt); + /* this one cleans up the dentry/mnt in case of failure */ + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(new): %p", new_file); + if (!new_file || IS_ERR(new_file)) { + res = IS_ERR(new_file) ? (void *) new_file : res; + goto out_fput_old; + } + + size = i_size_read(old_file->f_dentry->d_inode); + ret = do_cow_splice(old_file, new_file, size); + vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret); + if (ret < 0) { + res = ERR_PTR(ret); + goto out_fput_both; + } else if (ret < size) { + res = ERR_PTR(-ENOSPC); + goto out_fput_both; + } else { + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct iattr attr = { + .ia_uid = old_inode->i_uid, + .ia_gid = old_inode->i_gid, + .ia_valid = ATTR_UID | ATTR_GID + }; + + ret = inode_setattr(new_inode, &attr); + if (ret) { + res = ERR_PTR(ret); + goto out_fput_both; + } + } + + mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex); + ret = -ENOENT; + if (d_unhashed(old_dentry)) + goto out_unlock; + + vxdprintk(VXD_CBIT(misc, 2), + "vfs_rename: [»%*s«:%d] -> [»%*s«:%d]", + new_dentry->d_name.len, new_dentry->d_name.name, + new_dentry->d_name.len, + old_dentry->d_name.len, old_dentry->d_name.name, + old_dentry->d_name.len); + ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry, + old_nd.dentry->d_parent->d_inode, old_dentry); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + +out_unlock: + mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex); + + if (!ret) { + res = new_dentry; + dget(new_dentry); + } + else + res = ERR_PTR(ret); + +out_fput_both: + vxdprintk(VXD_CBIT(misc, 3), + "fput(new_file=%p[#%d])", new_file, + atomic_read(&new_file->f_count)); + fput(new_file); + +out_fput_old: + vxdprintk(VXD_CBIT(misc, 3), + "fput(old_file=%p[#%d])", old_file, + atomic_read(&old_file->f_count)); + fput(old_file); + +out_unlock_new: + mutex_unlock(&dir->d_inode->i_mutex); + if (IS_ERR(res)) + vfs_unlink(dir->d_inode, new_dentry, &dir_nd); + dput(new_dentry); +out_rel_both: + path_release(&dir_nd); +out_rel_old: + path_release(&old_nd); +out_free_path: + kfree(path); +out: + return res; +} + +#endif + /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) {