--- linux-2.6.14/fs/namei.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namei.c 2005-10-30 04:29:36 +0100 @@ -654,7 +677,8 @@ static inline void follow_dotdot(struct if (nd->dentry == current->fs->root && nd->mnt == current->fs->rootmnt) { read_unlock(¤t->fs->lock); - break; + /* for sane '/' avoid follow_mount() */ + return; } read_unlock(¤t->fs->lock); spin_lock(&dcache_lock); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -161,6 +163,7 @@ clone_mnt(struct vfsmount *old, struct d mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; + mnt->mnt_xid = old->mnt_xid; /* stick the duplicate mount on the same expiry list * as the original if that was on one */ --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -215,6 +218,32 @@ static inline void mangle(struct seq_fil seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -238,10 +268,20 @@ static int show_vfsmnt(struct seq_file * }; struct proc_fs_info *fs_infop; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt)) + return 0; + + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } mangle(m, mnt->mnt_sb->s_type->name); seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -252,6 +292,8 @@ static int show_vfsmnt(struct seq_file * if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } + if (mnt->mnt_flags & MNT_XID) + seq_printf(m, ",xid=%d", mnt->mnt_xid); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -337,19 +379,12 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -static void umount_tree(struct vfsmount *mnt) +static inline void __umount_list(struct list_head *kill) { - struct vfsmount *p; - LIST_HEAD(kill); - - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_list); - list_add(&p->mnt_list, &kill); - p->mnt_namespace = NULL; - } + struct vfsmount *mnt; - while (!list_empty(&kill)) { - mnt = list_entry(kill.next, struct vfsmount, mnt_list); + while (!list_empty(kill)) { + mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -365,6 +400,34 @@ static void umount_tree(struct vfsmount } } +void umount_tree(struct vfsmount *mnt) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; + } + __umount_list(&kill); +} + +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + if (p == fs->rootmnt || p == fs->pwdmnt) + continue; + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; + } + __umount_list(&kill); +} + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -481,7 +544,7 @@ asmlinkage long sys_umount(char __user * goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -508,6 +571,8 @@ static int mount_is_safe(struct nameidat { if (capable(CAP_SYS_ADMIN)) return 0; + if (vx_ccaps(VXC_SECURE_MOUNT)) + return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -617,11 +682,12 @@ out_unlock: /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, int flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -646,6 +712,10 @@ static int do_loopback(struct nameidata list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); + if (flags & MS_XID) { + mnt->mnt_xid = xid; + mnt->mnt_flags |= MNT_XID; + } err = graft_tree(mnt, nd); if (err) { spin_lock(&vfsmount_lock); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -667,12 +737,12 @@ static int do_loopback(struct nameidata */ static int do_remount(struct nameidata *nd, int flags, int mnt_flags, - void *data) + void *data, xid_t xid) { int err; struct super_block * sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -681,10 +751,15 @@ static int do_remount(struct nameidata * if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; + if (vx_ccaps(VXC_SECURE_REMOUNT)) + mnt_flags |= MNT_NODEV; down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data, 0); - if (!err) + if (!err) { nd->mnt->mnt_flags=mnt_flags; + if (flags & MS_XID) + nd->mnt->mnt_xid = xid; + } up_write(&sb->s_umount); if (!err) security_sb_post_remount(nd->mnt, flags, data); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -696,7 +771,7 @@ static int do_move_mount(struct nameidat struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -768,7 +843,7 @@ static int do_new_mount(struct nameidata return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -1017,6 +1092,7 @@ long do_mount(char * dev_name, char * di struct nameidata nd; int retval = 0; int mnt_flags = 0; + xid_t xid = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -1032,6 +1108,14 @@ long do_mount(char * dev_name, char * di if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + retval = vx_parse_xid(data_page, &xid, 1); + if (retval) { + mnt_flags |= MNT_XID; + /* bind and re-mounts get xid flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_XID; + } + /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -1041,6 +1125,9 @@ long do_mount(char * dev_name, char * di mnt_flags |= MNT_NOEXEC; flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + if (vx_ccaps(VXC_SECURE_MOUNT)) + mnt_flags |= MNT_NODEV; + /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); if (retval) --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -1052,9 +1139,9 @@ long do_mount(char * dev_name, char * di if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, xid); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, xid, flags); else if (flags & MS_MOVE) retval = do_move_mount(&nd, dev_name); else --- linux-2.6.14/fs/namespace.c 2005-10-28 20:49:44 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/namespace.c 2005-10-30 04:29:36 +0100 @@ -1081,7 +1168,7 @@ int copy_namespace(int flags, struct tas if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) { put_namespace(namespace); return -EPERM; } --- linux-2.6.14/fs/super.c 2005-08-29 22:25:33 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/super.c 2005-10-30 04:29:36 +0100 @@ -804,7 +806,7 @@ struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); + struct super_block *sb; struct vfsmount *mnt; int error; char *secdata = NULL; --- linux-2.6.14/fs/super.c 2005-08-29 22:25:33 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/super.c 2005-10-30 04:29:36 +0100 @@ -812,6 +814,12 @@ do_kern_mount(const char *fstype, int fl if (!type) return ERR_PTR(-ENODEV); + sb = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_BINARY_MOUNT)) + goto out; + + sb = ERR_PTR(-ENOMEM); mnt = alloc_vfsmnt(name); if (!mnt) goto out; --- linux-2.6.14/include/linux/namespace.h 2005-08-29 22:25:42 +0200 +++ linux-2.6.14-vs2.0.1-pre3/include/linux/namespace.h 2005-10-29 03:19:02 +0200 @@ -14,6 +14,7 @@ struct namespace { extern int copy_namespace(int, struct task_struct *); extern void __put_namespace(struct namespace *namespace); +extern void umount_unused(struct vfsmount *, struct fs_struct *); static inline void put_namespace(struct namespace *namespace) { --- linux-2.6.14/include/linux/vserver/namespace.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.14-vs2.0.1-pre3/include/linux/vserver/namespace.h 2005-10-29 03:19:02 +0200 @@ -0,0 +1,14 @@ +#ifndef _VX_NAMESPACE_H +#define _VX_NAMESPACE_H + +#include + +struct vx_info; +struct namespace; +struct fs_struct; + +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *); + +#else /* _VX_NAMESPACE_H */ +#warning duplicate inclusion +#endif /* _VX_NAMESPACE_H */ --- linux-2.6.14/kernel/vserver/namespace.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.14-vs2.0.1-pre3/kernel/vserver/namespace.c 2005-10-30 04:29:36 +0100 @@ -0,0 +1,124 @@ +/* + * linux/kernel/vserver/namespace.c + * + * Virtual Server: Context Namespace Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* namespace functions */ + +#include + +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs) +{ + struct fs_struct *fs_copy; + + if (vxi->vx_namespace) + return -EPERM; + if (!ns || !fs) + return -EINVAL; + + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + return -ENOMEM; + + get_namespace(ns); + vxi->vx_namespace = ns; + vxi->vx_fs = fs_copy; + return 0; +} + +int vc_enter_namespace(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct fs_struct *old_fs, *fs; + struct namespace *old_ns; + int ret = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = -EINVAL; + if (!vxi->vx_namespace) + goto out_put; + + ret = -ENOMEM; + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + goto out_put; + + ret = 0; + task_lock(current); + old_ns = current->namespace; + old_fs = current->fs; + get_namespace(vxi->vx_namespace); + current->namespace = vxi->vx_namespace; + current->fs = fs; + task_unlock(current); + + put_namespace(old_ns); + put_fs_struct(old_fs); +out_put: + put_vx_info(vxi); + return ret; +} + +int vc_cleanup_namespace(uint32_t id, void *data) +{ + down_write(¤t->namespace->sem); + spin_lock(&vfsmount_lock); + umount_unused(current->namespace->root, current->fs); + spin_unlock(&vfsmount_lock); + up_write(¤t->namespace->sem); + return 0; +} + +int vc_set_namespace(uint32_t id, void __user *data) +{ + struct fs_struct *fs; + struct namespace *ns; + struct vx_info *vxi; + int ret; + + if (vx_check(0, VX_ADMIN|VX_WATCH)) + return -ENOSYS; + + task_lock(current); + vxi = get_vx_info(current->vx_info); + fs = current->fs; + atomic_inc(&fs->count); + ns = current->namespace; + get_namespace(current->namespace); + task_unlock(current); + + ret = vx_set_namespace(vxi, ns, fs); + + put_namespace(ns); + put_fs_struct(fs); + put_vx_info(vxi); + return ret; +} +