-- ./drivers/infiniband/core/addr.c ++ ./drivers/infiniband/core/addr.c @@ -252,7 +252,7 @@ static int addr6_resolve(struct sockaddr if (ipv6_addr_any(&fl.fl6_src)) { ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src); + &fl.fl6_dst, 0, &fl.fl6_src, NULL); if (ret) goto put; -- ./init/Kconfig ++ ./init/Kconfig @@ -591,6 +592,7 @@ config CGROUP_DEBUG config CGROUP_NS bool "Namespace cgroup subsystem" + default n help Provides a simple namespace cgroup subsystem to provide hierarchical naming of sets of namespaces, -- ./security/commoncap.c ++ ./security/commoncap.c @@ -27,6 +27,7 @@ #include #include #include +// #include /* * If a non-root user executes a setuid-root binary in @@ -52,7 +53,7 @@ static void warn_setuid_and_fcaps_mixed( int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current_cap(); + NETLINK_CB(skb).eff_cap = vx_mbcaps(current_cap()); return 0; } @@ -82,7 +84,22 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap, int audit) { - return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; + struct vx_info *vxi = tsk->vx_info; + +#if 0 + printk("cap_capable() VXF_STATE_SETUP = %llx, raised = %x, eff = %08x:%08x\n", + vx_info_flags(vxi, VXF_STATE_SETUP, 0), + cap_raised(tsk->cap_effective, cap), + tsk->cap_effective.cap[1], tsk->cap_effective.cap[0]); +#endif + + /* special case SETUP */ + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) && + /* FIXME: maybe use cred instead? */ + cap_raised(tsk->cred->cap_effective, cap)) + return 0; + + return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM; } /** -- ./block/genhd.c ++ ./block/genhd.c @@ -1154,17 +1154,17 @@ static int diskstats_show(struct seq_fil cpu = part_stat_lock(); part_round_stats(cpu, hd); part_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu " - "%u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %lu " + "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[0]), part_stat_read(hd, merges[0]), - (unsigned long long)part_stat_read(hd, sectors[0]), + part_stat_read(hd, sectors[0]), jiffies_to_msecs(part_stat_read(hd, ticks[0])), part_stat_read(hd, ios[1]), part_stat_read(hd, merges[1]), - (unsigned long long)part_stat_read(hd, sectors[1]), + part_stat_read(hd, sectors[1]), jiffies_to_msecs(part_stat_read(hd, ticks[1])), part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), -- ./ipc/namespace.c ++ ./ipc/namespace.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "util.h" -- ./kernel/pid_namespace.c ++ ./kernel/pid_namespace.c @@ -14,6 +14,7 @@ #include #include #include +#include #define BITS_PER_PAGE (PAGE_SIZE*8) -- ./kernel/sched.c ++ ./kernel/sched.c @@ -3759,6 +3772,7 @@ void account_system_time(struct task_str cputime_t cputime, cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { -- ./kernel/fork.c ++ ./kernel/fork.c @@ -678,6 +686,7 @@ struct mm_struct *dup_mm(struct task_str goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; /* Initializing for Swap token stuff */ mm->token_priority = 0; -- ./kernel/nsproxy.c ++ ./kernel/nsproxy.c @@ -56,41 +61,52 @@ static inline struct nsproxy *create_nsp * Return the newly created nsproxy. Do not attach this to the task, * leave it to the caller to do proper locking and attach it to task. */ -static struct nsproxy *create_new_namespaces(unsigned long flags, - struct task_struct *tsk, struct fs_struct *new_fs) +static struct nsproxy *unshare_namespaces(unsigned long flags, + struct nsproxy *orig, struct fs_struct *new_fs) { struct nsproxy *new_nsp; int err; + vxdprintk(VXD_CBIT(space, 4), + "unshare_namespaces(0x%08lx,%p,%p)", + flags, orig, new_fs); + new_nsp = create_nsproxy(); if (!new_nsp) return ERR_PTR(-ENOMEM); - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); + new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs); if (IS_ERR(new_nsp->mnt_ns)) { err = PTR_ERR(new_nsp->mnt_ns); goto out_ns; } - new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); + new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns); if (IS_ERR(new_nsp->uts_ns)) { err = PTR_ERR(new_nsp->uts_ns); goto out_uts; } - new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); + new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns); if (IS_ERR(new_nsp->ipc_ns)) { err = PTR_ERR(new_nsp->ipc_ns); goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); + new_nsp->pid_ns = copy_pid_ns(flags, orig->pid_ns); if (IS_ERR(new_nsp->pid_ns)) { err = PTR_ERR(new_nsp->pid_ns); goto out_pid; } - new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); + /* disabled now? + new_nsp->user_ns = copy_user_ns(flags, orig->user_ns); + if (IS_ERR(new_nsp->user_ns)) { + err = PTR_ERR(new_nsp->user_ns); + goto out_user; + } */ + + new_nsp->net_ns = copy_net_ns(flags, orig->net_ns); if (IS_ERR(new_nsp->net_ns)) { err = PTR_ERR(new_nsp->net_ns); goto out_net; -- ./kernel/sys.c ++ ./kernel/sys.c @@ -1197,7 +1210,7 @@ SYSCALL_DEFINE2(sethostname, char __user int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1246,7 +1259,7 @@ SYSCALL_DEFINE2(setdomainname, char __us int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1415,7 +1428,7 @@ static int check_prlimit_permission(stru cred->gid != tcred->egid || cred->gid != tcred->sgid || cred->gid != tcred->gid) && - !capable(CAP_SYS_RESOURCE)) { + !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT)) { return -EPERM; } -- ./kernel/capability.c ++ ./kernel/capability.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* -- ./kernel/utsname.c ++ ./kernel/utsname.c @@ -14,14 +14,17 @@ #include #include #include +#include static struct uts_namespace *create_uts_ns(void) { struct uts_namespace *uts_ns; uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); - if (uts_ns) + if (uts_ns) { kref_init(&uts_ns->kref); + atomic_inc(&vs_global_uts_ns); + } return uts_ns; } @@ -71,5 +74,6 @@ void free_uts_ns(struct kref *kref) struct uts_namespace *ns; ns = container_of(kref, struct uts_namespace, kref); + atomic_dec(&vs_global_uts_ns); kfree(ns); } -- ./mm/page_alloc.c ++ ./mm/page_alloc.c @@ -53,6 +53,8 @@ #include #include #include +#include +#include #include #include -- ./include/linux/fs.h ++ ./include/linux/fs.h @@ -237,6 +240,14 @@ struct inodes_stat_t { #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_IXUNLINK 4096 /* Immutable Invert on unlink */ + +/* Linux-VServer related Inode flags */ + +#define V_VALID 1 +#define V_XATTR 2 +#define V_BARRIER 4 /* Barrier for chroot() */ +#define V_COW 8 /* Copy on Write */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -360,11 +384,14 @@ struct inodes_stat_t { #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ -#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define FS_COW_FL 0x20000000 /* Copy on Write marker */ +#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */ #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 -- ./include/net/route.h ++ ./include/net/route.h @@ -211,6 +214,9 @@ static inline char rt_tos2priority(u8 to return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +extern int ip_v4_find_src(struct net *net, struct nx_info *, + struct rtable **, struct flowi *); + static inline int ip_route_connect(struct rtable **rp, __be32 dst, __be32 src, u32 tos, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk, @@ -226,11 +232,24 @@ static inline int ip_route_connect(struc .fl_ip_dport = dport }; int err; struct net *net = sock_net(sk); + struct nx_info *nx_info = current_nx_info(); if (inet_sk(sk)->transparent) fl.flags |= FLOWI_FLAG_ANYSRC; - if (!dst || !src) { + if (sk) + nx_info = sk->sk_nx_info; + + vxdprintk(VXD_CBIT(net, 4), + "ip_route_connect(%p) %p,%p;%lx", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + err = ip_v4_find_src(net, nx_info, rp, &fl); + if (err) + return err; + + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(net, rp, &fl); if (err) return err; -- ./net/ipv6/route.c ++ ./net/ipv6/route.c @@ -2290,7 +2290,8 @@ static int rt6_fill_node(struct net *net struct inet6_dev *idev = ip6_dst_idev(&rt->dst); struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - dst, 0, &saddr_buf) == 0) + dst, 0, &saddr_buf, + (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } -- ./net/ipv6/ip6_output.c ++ ./net/ipv6/ip6_output.c @@ -933,7 +933,7 @@ static int ip6_dst_lookup_tail(struct so err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, &fl->fl6_dst, sk ? inet6_sk(sk)->srcprefs : 0, - &fl->fl6_src); + &fl->fl6_src, sk->sk_nx_info); if (err) goto out_err_release; } -- ./net/netfilter/ipvs/ip_vs_xmit.c ++ ./net/netfilter/ipvs/ip_vs_xmit.c @@ -213,7 +213,7 @@ __ip_vs_route_output_v6(struct net *net, return dst; if (ipv6_addr_any(&fl.fl6_src) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src) < 0) + &fl.fl6_dst, 0, &fl.fl6_src, NULL) < 0) goto out_err; if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) goto out_err; -- ./net/socket.c ++ ./net/socket.c @@ -551,7 +555,7 @@ static inline int __sock_sendmsg(struct struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; sock_update_classid(sock->sk); -- ./net/ipv4/udp.c ++ ./net/ipv4/udp.c @@ -898,8 +903,13 @@ int udp_sendmsg(struct kiocb *iocb, stru .fl_ip_sport = inet->inet_sport, .fl_ip_dport = dport }; struct net *net = sock_net(sk); + struct nx_info *nxi = sk->sk_nx_info; security_sk_classify_flow(sk, &fl); + err = ip_v4_find_src(net, nxi, &rt, &fl); + if (err) + goto out; + err = ip_route_output_flow(net, &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) @@ -2091,7 +2106,10 @@ static void udp4_format_sock(struct sock seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", - bucket, src, srcp, dest, destp, sp->sk_state, + bucket, + nx_map_sock_lback(current_nx_info(), src), srcp, + nx_map_sock_lback(current_nx_info(), dest), destp, + sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), -- ./net/ipv4/tcp_ipv4.c ++ ./net/ipv4/tcp_ipv4.c @@ -2427,7 +2454,10 @@ static void get_tcp4_sock(struct sock *s seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", - i, src, srcp, dest, destp, sk->sk_state, + i, + nx_map_sock_lback(current_nx_info(), src), srcp, + nx_map_sock_lback(current_nx_info(), dest), destp, + sk->sk_state, tp->write_seq - tp->snd_una, rx_queue, timer_active, @@ -2462,7 +2492,10 @@ static void get_timewait4_sock(struct in seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n", - i, src, srcp, dest, destp, tw->tw_substate, 0, 0, + i, + nx_map_sock_lback(current_nx_info(), src), srcp, + nx_map_sock_lback(current_nx_info(), dest), destp, + tw->tw_substate, 0, 0, 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw, len); } -- ./net/ipv4/raw.c ++ ./net/ipv4/raw.c @@ -564,6 +570,13 @@ static int raw_sendmsg(struct kiocb *ioc } security_sk_classify_flow(sk, &fl); + if (sk->sk_nx_info) { + err = ip_v4_find_src(sock_net(sk), + sk->sk_nx_info, &rt, &fl); + + if (err) + goto done; + } err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); } if (err) @@ -980,7 +998,10 @@ static void raw_sock_seq_show(struct seq seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", - i, src, srcp, dest, destp, sp->sk_state, + i, + nx_map_sock_lback(current_nx_info(), src), srcp, + nx_map_sock_lback(current_nx_info(), dest), destp, + sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), -- ./net/sctp/ipv6.c ++ ./net/sctp/ipv6.c @@ -306,7 +306,8 @@ static void sctp_v6_get_saddr(struct sct dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, inet6_sk(&sk->inet.sk)->srcprefs, - &saddr->v6.sin6_addr); + &saddr->v6.sin6_addr, + asoc->base.sk->sk_nx_info); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n", &saddr->v6.sin6_addr); return; -- ./net/sunrpc/clnt.c ++ ./net/sunrpc/clnt.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include -- ./arch/m68k/kernel/traps.c ++ ./arch/m68k/kernel/traps.c @@ -906,8 +906,8 @@ void show_registers(struct pt_regs *regs printk("d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", regs->d4, regs->d5, regs->a0, regs->a1); - printk("Process %s (pid: %d, task=%p)\n", - current->comm, task_pid_nr(current), current); + printk("Process %s (pid: %d[#%u], task=%p)\n", + current->comm, task_pid_nr(current), current->xid, current); addr = (unsigned long)&fp->un; printk("Frame format=%X ", regs->format); switch (regs->format) { -- ./arch/m68k/kernel/ptrace.c ++ ./arch/m68k/kernel/ptrace.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -255,6 +256,8 @@ long arch_ptrace(struct task_struct *chi ret = ptrace_request(child, request, addr, data); break; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT)) + goto out_tsk; return ret; out_eio: -- ./arch/ia64/mm/fault.c ++ ./arch/ia64/mm/fault.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include -- ./fs/xfs/linux-2.6/xfs_super.c ++ ./fs/xfs/linux-2.6/xfs_super.c @@ -112,6 +112,9 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ +#define MNTOPT_TAGXID "tagxid" /* context tagging for inodes */ +#define MNTOPT_TAGGED "tag" /* context tagging for inodes */ +#define MNTOPT_NOTAGTAG "notag" /* do not use context tagging */ /* * Table driven mount option parser. @@ -369,6 +376,19 @@ xfs_parseargs( } else if (!strcmp(this_char, "irixsgid")) { cmn_err(CE_WARN, "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); +#ifndef CONFIG_TAGGING_NONE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + mp->m_flags |= XFS_MOUNT_TAGGED; + } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) { + mp->m_flags &= ~XFS_MOUNT_TAGGED; + } else if (!strcmp(this_char, MNTOPT_TAGXID)) { + mp->m_flags |= XFS_MOUNT_TAGGED; +#endif +#ifdef CONFIG_PROPAGATE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + /* use value */ + mp->m_flags |= XFS_MOUNT_TAGGED; +#endif } else { cmn_err(CE_WARN, "XFS: unknown mount option [%s].", this_char); -- ./fs/ext4/inode.c ++ ./fs/ext4/inode.c @@ -4446,7 +4447,7 @@ static void ext4_free_branches(handle_t int ext4_can_truncate(struct inode *inode) { - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return 0; if (S_ISREG(inode->i_mode)) return 1; -- ./fs/namespace.c ++ ./fs/namespace.c @@ -1000,10 +1033,20 @@ static int show_vfsmnt(struct seq_file * int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, &mnt_path, " \t\n\\"); - seq_putc(m, ' '); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return SEQ_SKIP; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return SEQ_SKIP; + + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->root.mnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, &mnt_path, " \t\n\\"); + seq_putc(m, ' '); + } show_type(m, mnt->mnt_sb); seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); err = show_sb_opts(m, mnt->mnt_sb); @@ -1097,17 +1145,27 @@ static int show_vfsstat(struct seq_file struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; int err = 0; - /* device */ - if (mnt->mnt_devname) { - seq_puts(m, "device "); - mangle(m, mnt->mnt_devname); - } else - seq_puts(m, "no device"); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return SEQ_SKIP; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return SEQ_SKIP; - /* mount point */ - seq_puts(m, " mounted on "); - seq_path(m, &mnt_path, " \t\n\\"); - seq_putc(m, ' '); + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->root.mnt) { + seq_puts(m, "device /dev/root mounted on / "); + } else { + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, &mnt_path, " \t\n\\"); + seq_putc(m, ' '); + } /* file system type */ seq_puts(m, "with fstype "); @@ -1747,11 +1805,13 @@ static int do_change_type(struct path *p * do loopback mount. */ static int do_loopback(struct path *path, char *old_name, - int recurse) + tag_t tag, unsigned long flags, int mnt_flags) { struct path old_path; struct vfsmount *mnt = NULL; int err = mount_is_safe(path); + int recurse = flags & MS_REC; + if (err) return err; if (!old_name || !*old_name) @@ -2581,9 +2652,10 @@ SYSCALL_DEFINE2(pivot_root, const char _ down_write(&namespace_sem); mutex_lock(&old.dentry->d_inode->i_mutex); error = -EINVAL; - if (IS_MNT_SHARED(old.mnt) || + if ((IS_MNT_SHARED(old.mnt) || IS_MNT_SHARED(new.mnt->mnt_parent) || - IS_MNT_SHARED(root.mnt->mnt_parent)) + IS_MNT_SHARED(root.mnt->mnt_parent)) && + !vx_flags(VXF_STATE_SETUP, 0)) goto out2; if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) goto out2; -- ./fs/dcache.c ++ ./fs/dcache.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "internal.h" /* @@ -1287,6 +1294,7 @@ struct dentry *d_alloc(struct dentry * p dname[name->len] = 0; dentry->d_count = 1; + vx_dentry_inc(dentry); dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); -- ./fs/super.c ++ ./fs/super.c @@ -31,6 +31,9 @@ #include #include #include +#include +#include +#include #include "internal.h" @@ -964,6 +967,7 @@ struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; + struct super_block *sb; struct dentry *root; char *secdata = NULL; int error; @@ -971,6 +975,11 @@ vfs_kern_mount(struct file_system_type * if (!type) return ERR_PTR(-ENODEV); + error = -EPERM; + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT)) + goto out; + error = -ENOMEM; mnt = alloc_vfsmnt(name); if (!mnt) @@ -1002,12 +1011,20 @@ vfs_kern_mount(struct file_system_type * if (error < 0) goto out_free_secdata; } - BUG_ON(!mnt->mnt_sb); - WARN_ON(!mnt->mnt_sb->s_bdi); + + sb = mnt->mnt_sb; + BUG_ON(!sb); + WARN_ON(!sb->s_bdi); WARN_ON(mnt->mnt_sb->s_bdi == &default_backing_dev_info); mnt->mnt_sb->s_flags |= MS_BORN; - error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); + error = -EPERM; + if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + + error = security_sb_kern_mount(sb, flags, secdata); if (error) goto out_sb; -- ./fs/namei.c ++ ./fs/namei.c @@ -1130,7 +1224,7 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; - struct inode *dir; + struct inode *dir, *d_inode; int err; /* @@ -1191,6 +1285,13 @@ found: if (IS_ERR(dentry)) goto fail; } + + d_inode = dentry->d_inode; + if (!d_inode) + goto done; + + if (__dx_permission(d_inode, MAY_ACCESS)) + goto hidden; done: path->mnt = mnt; path->dentry = dentry; @@ -1202,6 +1303,18 @@ done: *inode = path->dentry->d_inode; return 0; +hidden: +#ifndef CONFIG_VSERVER_WARN_DEVPTS + if (d_inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC) +#endif + vxwprintk_task(1, + "did lookup hidden %s:%p[#%d,%lu] " VS_Q("%s/%.*s") ".", + d_inode->i_sb->s_id, d_inode, d_inode->i_tag, d_inode->i_ino, + vxd_path(&nd->path), name->len, name->name); + + dput(dentry); + return -ENOENT; + need_lookup: dir = parent->d_inode; BUG_ON(nd->inode != dir); @@ -2063,7 +2185,8 @@ static int open_will_truncate(int flag, } static struct file *finish_open(struct nameidata *nd, - int open_flag, int acc_mode) + int open_flag, int acc_mode, + const char *pathname) { struct file *filp; int will_truncate; @@ -2076,6 +2199,23 @@ static struct file *finish_open(struct n goto exit; } error = may_open(&nd->path, acc_mode, open_flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + struct dentry *dentry; + dentry = cow_break_link(pathname); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto exit_cow; + } + dput(dentry); + if (will_truncate) + mnt_drop_write(nd->path.mnt); + release_open_intent(nd); + path_put(&nd->path); + return ERR_PTR(-EMLINK); + } +exit_cow: +#endif if (error) { if (will_truncate) mnt_drop_write(nd->path.mnt); @@ -2223,7 +2363,7 @@ static struct file *do_last(struct namei if (S_ISDIR(nd->inode->i_mode)) goto exit; ok: - filp = finish_open(nd, open_flag, acc_mode); + filp = finish_open(nd, open_flag, acc_mode, pathname); return filp; exit_mutex_unlock: @@ -2250,7 +2390,12 @@ struct file *do_filp_open(int dfd, const int count = 0; int flag = open_to_namei_flags(open_flag); int flags; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif if (!(open_flag & O_CREAT)) mode = 0; @@ -2316,7 +2461,7 @@ struct file *do_filp_open(int dfd, const goto out_path2; } audit_inode(pathname, nd.path.dentry); - filp = finish_open(&nd, open_flag, acc_mode); + filp = finish_open(&nd, open_flag, acc_mode, pathname); out2: release_open_intent(&nd); return filp; @@ -2358,6 +2503,13 @@ reval: */ nd.flags = flags; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); +#ifdef CONFIG_VSERVER_COWBL + if (unlikely(IS_ERR(filp) && PTR_ERR(filp) == -EMLINK)) { + flag = rflag; + mode = rmode; + goto restart; + } +#endif while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; @@ -2392,6 +2544,13 @@ reval: } nd.flags &= ~LOOKUP_PARENT; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); +#ifdef CONFIG_VSERVER_COWBL + if (unlikely(IS_ERR(filp) && PTR_ERR(filp) == -EMLINK)) { + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (linki->i_op->put_link) linki->i_op->put_link(link.dentry, &nd, cookie); path_put(&link); @@ -2490,9 +2649,17 @@ int vfs_mknod(struct inode *dir, struct if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if (!(S_ISCHR(mode) || S_ISBLK(mode))) + goto okay; + + if (!capable(CAP_MKNOD)) return -EPERM; + if (S_ISCHR(mode) && !vs_chrdev_perm(dev, DATTR_CREATE)) + return -EPERM; + if (S_ISBLK(mode) && !vs_blkdev_perm(dev, DATTR_CREATE)) + return -EPERM; +okay: if (!dir->i_op->mknod) return -EPERM; -- ./fs/xattr.c ++ ./fs/xattr.c @@ -49,7 +50,7 @@ xattr_permission(struct inode *inode, co * The trusted.* namespace can only be accessed by a privileged user. */ if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + return (vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED) ? 0 : -EPERM); /* In user.* namespace, only regular files and directories can have * extended attributes. For sticky directories, only the owner and -- ./fs/inode.c ++ ./fs/inode.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * This is needed for the following functions: -- ./fs/btrfs/super.c ++ ./fs/btrfs/super.c @@ -155,8 +155,8 @@ enum { Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, - Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_err, + Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, + Opt_enospc_debug, Opt_tag, Opt_notag, Opt_tagid, Opt_err, }; static match_table_t tokens = { @@ -186,6 +186,9 @@ static match_table_t tokens = { {Opt_clear_cache, "clear_cache"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_enospc_debug, "enospc_debug"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, {Opt_err, NULL}, }; -- ./fs/btrfs/ioctl.c ++ ./fs/btrfs/ioctl.c @@ -162,6 +239,7 @@ static int btrfs_ioctl_setflags(struct f if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; + /* maybe add FS_IXUNLINK_FL ? */ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ FS_SYNC_FL | FS_DIRSYNC_FL)) -- ./fs/btrfs/inode.c ++ ./fs/btrfs/inode.c @@ -37,6 +37,8 @@ #include #include #include +#include + #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -2614,8 +2623,15 @@ static void fill_inode_item(struct btrfs struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(leaf, item, inode->i_uid); - btrfs_set_inode_gid(leaf, item, inode->i_gid); + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); + + btrfs_set_inode_uid(leaf, item, uid); + btrfs_set_inode_gid(leaf, item, gid); +#ifdef CONFIG_TAGGING_INTERN + btrfs_set_inode_tag(leaf, item, inode->i_tag); +#endif + btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_mode(leaf, item, inode->i_mode); btrfs_set_inode_nlink(leaf, item, inode->i_nlink); -- ./fs/btrfs/ctree.h ++ ./fs/btrfs/ctree.h @@ -2613,6 +2623,7 @@ extern const struct dentry_operations bt long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); +int btrfs_sync_flags(struct inode *inode, int, int); /* file.c */ int btrfs_sync_file(struct file *file, int datasync); -- ./fs/ocfs2/file.c ++ ./fs/ocfs2/file.c @@ -1126,13 +1126,15 @@ int ocfs2_setattr(struct dentry *dentry, mlog(0, "uid change: %d\n", attr->ia_uid); if (attr->ia_valid & ATTR_GID) mlog(0, "gid change: %d\n", attr->ia_gid); + if (attr->ia_valid & ATTR_TAG) + mlog(0, "tag change: %d\n", attr->ia_tag); if (attr->ia_valid & ATTR_SIZE) mlog(0, "size change...\n"); if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) mlog(0, "time change...\n"); #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ - | ATTR_GID | ATTR_UID | ATTR_MODE) + | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE) if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); return 0; -- ./fs/proc/base.c ++ ./fs/proc/base.c @@ -83,6 +83,8 @@ #include #include #include +#include +#include #include "internal.h" /* NOTE: -- ./Makefile ++ ./Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 38 -EXTRAVERSION = .8 +EXTRAVERSION = .8-vs2.3.0.37-rc17 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION*