diff -NurpP --minimal linux-2.6.19/Documentation/vserver/debug.txt linux-2.6.19-vs2.1.x-t1/Documentation/vserver/debug.txt --- linux-2.6.19/Documentation/vserver/debug.txt 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/Documentation/vserver/debug.txt 2006-11-08 04:57:48 +0100 @@ -0,0 +1,154 @@ + +debug_cvirt: + + 2 4 "vx_map_tgid: %p/%llx: %d -> %d" + "vx_rmap_tgid: %p/%llx: %d -> %d" + +debug_dlim: + + 0 1 "ALLOC (%p,#%d)%c inode (%d)" + "FREE (%p,#%d)%c inode" + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)" + "FREE (%p,#%d)%c %lld bytes" + 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]" + 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d" + "ext3_has_free_blocks(%p): free=%lu, root=%lu" + "rcu_free_dl_info(%p)" + 4 10 "alloc_dl_info(%p,%d) = %p" + "dealloc_dl_info(%p)" + "get_dl_info(%p[#%d.%d])" + "put_dl_info(%p[#%d.%d])" + 5 20 "alloc_dl_info(%p,%d)*" + 6 40 "__hash_dl_info: %p[#%d]" + "__unhash_dl_info: %p[#%d]" + 7 80 "locate_dl_info(%p,#%d) = %p" + +debug_misc: + + 0 1 "destroy_dqhash: %p [#0x%08x] c=%d" + "new_dqhash: %p [#0x%08x]" + "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]" + "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]" + "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]" + "vroot_get_real_bdev not set" + 1 2 "cow_break_link(»%s«)" + "temp copy »%s«" + 2 4 "dentry_open(new): %p" + "dentry_open(old): %p" + "lookup_create(new): %p" + "old path »%s«" + "path_lookup(old): %d" + "vfs_create(new): %d" + "vfs_rename: %d" + "vfs_sendfile: %d" + 3 8 "fput(new_file=%p[#%d])" + "fput(old_file=%p[#%d])" + 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d" + "vx_info_kill(%p[#%d],%d,%d)*" + 5 20 "vs_reboot(%p[#%d],%d)" + 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]" + +debug_net: + + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d" + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d" + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d" + 4 10 "ip_route_connect(%p) %p,%p;%lx" + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx" + 6 40 "sk,egf: %p [#%d] (from %d)" + "sk,egn: %p [#%d] (from %d)" + "sk,req: %p [#%d] (from %d)" + "sk: %p [#%d] (from %d)" + "tw: %p [#%d] (from %d)" + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d" + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d" + +debug_nid: + + 0 1 "__lookup_nx_info(#%u): %p[#%u]" + "alloc_nx_info(%d) = %p" + "create_nx_info(%d) (dynamic rejected)" + "create_nx_info(%d) = %p (already there)" + "create_nx_info(%d) = %p (new)" + "dealloc_nx_info(%p)" + 1 2 "alloc_nx_info(%d)*" + "create_nx_info(%d)*" + 2 4 "get_nx_info(%p[#%d.%d])" + "put_nx_info(%p[#%d.%d])" + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p" + "clr_nx_info(%p[#%d.%d])" + "init_nx_info(%p[#%d.%d])" + "release_nx_info(%p[#%d.%d.%d]) %p" + "set_nx_info(%p[#%d.%d])" + 4 10 "__hash_nx_info: %p[#%d]" + "__nx_dynamic_id: [#%d]" + "__unhash_nx_info: %p[#%d]" + 5 20 "moved task %p into nxi:%p[#%d]" + "nx_migrate_task(%p,%p[#%d.%d.%d])" + "task_get_nx_info(%p)" + 6 40 "nx_clear_persistent(%p[#%d])" + +debug_quota: + + 0 1 "quota_sync_dqh(%p,%d) discard inode %p" + 1 2 "quota_sync_dqh(%p,%d)" + "sync_dquots(%p,%d)" + "sync_dquots_dqh(%p,%d)" + 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)" + +debug_switch: + + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]" + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]" + 4 10 "%s: (%s %s) returned %s with %d" + +debug_tag: + + 7 80 "dx_parse_tag(»%s«): %d:#%d" + "dx_propagate_tag(%p[#%lu.%d]): %d,%d" + +debug_xid: + + 0 1 "__lookup_vx_info(#%u): %p[#%u]" + "alloc_vx_info(%d) = %p" + "alloc_vx_info(%d)*" + "create_vx_info(%d) (dynamic rejected)" + "create_vx_info(%d) = %p (already there)" + "create_vx_info(%d) = %p (new)" + "dealloc_vx_info(%p)" + "loc_vx_info(%d) = %p (found)" + "loc_vx_info(%d) = %p (new)" + "loc_vx_info(%d) = %p (not available)" + 1 2 "create_vx_info(%d)*" + "loc_vx_info(%d)*" + 2 4 "get_vx_info(%p[#%d.%d])" + "put_vx_info(%p[#%d.%d])" + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p" + "clr_vx_info(%p[#%d.%d])" + "init_vx_info(%p[#%d.%d])" + "release_vx_info(%p[#%d.%d.%d]) %p" + "set_vx_info(%p[#%d.%d])" + "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]" + 4 10 "__hash_vx_info: %p[#%d]" + "__unhash_vx_info: %p[#%d]" + "__vx_dynamic_id: [#%d]" + 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]" + "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]" + "moved task %p into vxi:%p[#%d]" + "task_get_vx_info(%p)" + "vx_migrate_task(%p,%p[#%d.%d])" + 6 40 "vx_clear_persistent(%p[#%d])" + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_persistent(%p[#%d])" + "vx_set_reaper(%p[#%d],%p[#%d,%d])" + + +debug_limit: + + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s" + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s" + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d" + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d" diff -NurpP --minimal linux-2.6.19/Makefile linux-2.6.19-vs2.1.x-t1/Makefile --- linux-2.6.19/Makefile 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/Makefile 2006-11-30 21:22:22 +0100 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 19 -EXTRAVERSION = +EXTRAVERSION = -vs2.1.x-t1 NAME=Avast! A bilge rat! # *DOCUMENTATION* diff -NurpP --minimal linux-2.6.19/arch/alpha/Kconfig linux-2.6.19-vs2.1.x-t1/arch/alpha/Kconfig --- linux-2.6.19/arch/alpha/Kconfig 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/Kconfig 2006-11-08 04:57:40 +0100 @@ -632,6 +632,8 @@ source "arch/alpha/oprofile/Kconfig" source "arch/alpha/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/asm-offsets.c linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/asm-offsets.c --- linux-2.6.19/arch/alpha/kernel/asm-offsets.c 2006-02-15 13:54:10 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -36,6 +36,7 @@ void foo(void) DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(SIGCHLD, SIGCHLD); BLANK(); diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/entry.S linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/entry.S --- linux-2.6.19/arch/alpha/kernel/entry.S 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/entry.S 2006-11-08 04:57:52 +0100 @@ -644,7 +644,7 @@ kernel_thread: stq $2, 152($sp) /* HAE */ /* Shuffle FLAGS to the front; add CLONE_VM. */ - ldi $1, CLONE_VM|CLONE_UNTRACED + ldi $1, CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; or $18, $1, $16 bsr $26, sys_clone @@ -873,24 +873,15 @@ sys_getxgid: .globl sys_getxpid .ent sys_getxpid sys_getxpid: + lda $sp, -16($sp) + stq $26, 0($sp) .prologue 0 - ldq $2, TI_TASK($8) - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - ldl $0, TASK_TGID($2) -1: ldl $1, TASK_TGID($4) -#ifdef CONFIG_SMP - mov $4, $5 - mb - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - cmpeq $4, $5, $5 - beq $5, 1b -#endif - stq $1, 80($sp) + lda $16, 96($sp) + jsr $26, do_getxpid + ldq $26, 0($sp) + + lda $sp, 16($sp) ret .end sys_getxpid diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/osf_sys.c linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/osf_sys.c --- linux-2.6.19/arch/alpha/kernel/osf_sys.c 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/osf_sys.c 2006-11-08 04:57:44 +0100 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -443,6 +444,7 @@ osf_getdomainname(char __user *name, int { unsigned len; int i; + char *domainname; if (!access_ok(VERIFY_WRITE, name, namelen)) return -EFAULT; @@ -885,7 +887,7 @@ osf_gettimeofday(struct timeval32 __user { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/ptrace.c --- linux-2.6.19/arch/alpha/kernel/ptrace.c 2006-04-09 13:49:39 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,11 @@ do_sys_ptrace(long request, long pid, lo goto out_notsk; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + ret = -EPERM; + goto out; + } + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/semaphore.c linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/semaphore.c --- linux-2.6.19/arch/alpha/kernel/semaphore.c 2004-08-14 12:55:32 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/semaphore.c 2006-11-08 04:57:42 +0100 @@ -68,8 +68,8 @@ __down_failed(struct semaphore *sem) DECLARE_WAITQUEUE(wait, tsk); #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down failed(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down failed(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif tsk->state = TASK_UNINTERRUPTIBLE; @@ -97,8 +97,8 @@ __down_failed(struct semaphore *sem) wake_up(&sem->wait); #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down acquired(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down acquired(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif } @@ -110,8 +110,8 @@ __down_failed_interruptible(struct semap long ret = 0; #ifdef CONFIG_DEBUG_SEMAPHORE - printk("%s(%d): down failed(%p)\n", - tsk->comm, tsk->pid, sem); + printk("%s(%d:#%u): down failed(%p)\n", + tsk->comm, tsk->pid, tsk->xid, sem); #endif tsk->state = TASK_INTERRUPTIBLE; diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/systbls.S linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/systbls.S --- linux-2.6.19/arch/alpha/kernel/systbls.S 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/systbls.S 2006-11-08 04:57:41 +0100 @@ -446,7 +446,7 @@ sys_call_table: .quad sys_stat64 /* 425 */ .quad sys_lstat64 .quad sys_fstat64 - .quad sys_ni_syscall /* sys_vserver */ + .quad sys_vserver /* sys_vserver */ .quad sys_ni_syscall /* sys_mbind */ .quad sys_ni_syscall /* sys_get_mempolicy */ .quad sys_ni_syscall /* sys_set_mempolicy */ diff -NurpP --minimal linux-2.6.19/arch/alpha/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/traps.c --- linux-2.6.19/arch/alpha/kernel/traps.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -182,7 +182,8 @@ die_if_kernel(char * str, struct pt_regs #ifdef CONFIG_SMP printk("CPU %d ", hard_smp_processor_id()); #endif - printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err); + printk("%s(%d[#%u]): %s %ld\n", current->comm, + current->pid, current->xid, str, err); dik_show_regs(regs, r9_15); dik_show_trace((unsigned long *)(regs+1)); dik_show_code((unsigned int *)regs->pc); diff -NurpP --minimal linux-2.6.19/arch/alpha/mm/init.c linux-2.6.19-vs2.1.x-t1/arch/alpha/mm/init.c --- linux-2.6.19/arch/alpha/mm/init.c 2006-11-30 21:18:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/alpha/mm/init.c 2006-11-08 04:57:39 +0100 @@ -20,6 +20,7 @@ #include #include /* max_low_pfn */ #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/arm/Kconfig linux-2.6.19-vs2.1.x-t1/arch/arm/Kconfig --- linux-2.6.19/arch/arm/Kconfig 2006-11-30 21:18:24 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/arm/Kconfig 2006-11-30 20:55:45 +0100 @@ -935,6 +935,8 @@ source "arch/arm/oprofile/Kconfig" source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/arm/kernel/calls.S linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/calls.S --- linux-2.6.19/arch/arm/kernel/calls.S 2006-02-18 14:39:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/calls.S 2006-11-08 04:57:41 +0100 @@ -322,7 +322,7 @@ /* 310 */ CALL(sys_request_key) CALL(sys_keyctl) CALL(ABI(sys_semtimedop, sys_oabi_semtimedop)) -/* vserver */ CALL(sys_ni_syscall) + CALL(sys_vserver) CALL(sys_ioprio_set) /* 315 */ CALL(sys_ioprio_get) CALL(sys_inotify_init) diff -NurpP --minimal linux-2.6.19/arch/arm/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/process.c --- linux-2.6.19/arch/arm/kernel/process.c 2006-11-30 21:18:24 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -246,7 +246,8 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("Pid: %d[#%u], comm: %20s\n", + current->pid, current->xid, current->comm); __show_regs(regs); __backtrace(); } @@ -469,7 +470,8 @@ pid_t kernel_thread(int (*fn)(void *), v regs.ARM_pc = (unsigned long)kernel_thread_helper; regs.ARM_cpsr = SVC_MODE; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19/arch/arm/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/traps.c --- linux-2.6.19/arch/arm/kernel/traps.c 2006-11-30 21:18:24 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/arm/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -205,8 +205,8 @@ static void __die(const char *str, int e printk("Internal error: %s: %x [#%d]\n", str, err, ++die_counter); print_modules(); __show_regs(regs); - printk("Process %s (pid: %d, stack limit = 0x%p)\n", - tsk->comm, tsk->pid, thread + 1); + printk("Process %s (pid: %d:#%u, stack limit = 0x%p)\n", + tsk->comm, tsk->pid, tsk->xid, thread + 1); if (!user_mode(regs) || in_interrupt()) { dump_mem("Stack: ", regs->ARM_sp, diff -NurpP --minimal linux-2.6.19/arch/arm26/Kconfig linux-2.6.19-vs2.1.x-t1/arch/arm26/Kconfig --- linux-2.6.19/arch/arm26/Kconfig 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/arm26/Kconfig 2006-11-08 04:57:40 +0100 @@ -234,6 +234,8 @@ source "drivers/usb/Kconfig" source "arch/arm26/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/arm26/kernel/calls.S linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/calls.S --- linux-2.6.19/arch/arm26/kernel/calls.S 2005-03-02 12:38:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/calls.S 2006-11-08 04:57:41 +0100 @@ -257,6 +257,11 @@ __syscall_start: .long sys_lremovexattr .long sys_fremovexattr .long sys_tkill + + .rept 313 - (. - __syscall_start) / 4 + .long sys_ni_syscall + .endr + .long sys_vserver /* 313 */ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 diff -NurpP --minimal linux-2.6.19/arch/arm26/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/process.c --- linux-2.6.19/arch/arm26/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -365,7 +365,8 @@ pid_t kernel_thread(int (*fn)(void *), v regs.ARM_r3 = (unsigned long)do_exit; regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19/arch/arm26/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/traps.c --- linux-2.6.19/arch/arm26/kernel/traps.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/arm26/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -185,8 +185,9 @@ NORET_TYPE void die(const char *str, str printk("Internal error: %s: %x\n", str, err); printk("CPU: %d\n", smp_processor_id()); show_regs(regs); - printk("Process %s (pid: %d, stack limit = 0x%p)\n", - current->comm, current->pid, end_of_stack(tsk)); + printk("Process %s (pid: %d[#%u], stack limit = 0x%p)\n", + current->comm, current->pid, + current->xid, end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { __dump_stack(tsk, (unsigned long)(regs + 1)); diff -NurpP --minimal linux-2.6.19/arch/cris/Kconfig linux-2.6.19-vs2.1.x-t1/arch/cris/Kconfig --- linux-2.6.19/arch/cris/Kconfig 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/cris/Kconfig 2006-11-08 04:57:40 +0100 @@ -185,6 +185,8 @@ source "drivers/usb/Kconfig" source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/cris/arch-v10/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/cris/arch-v10/kernel/process.c --- linux-2.6.19/arch/cris/arch-v10/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/cris/arch-v10/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -103,7 +103,8 @@ int kernel_thread(int (*fn)(void *), voi regs.dccr = 1 << I_DCCR_BITNR; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* setup the child's kernel stack with a pt_regs and switch_stack on it. diff -NurpP --minimal linux-2.6.19/arch/cris/arch-v32/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/cris/arch-v32/kernel/process.c --- linux-2.6.19/arch/cris/arch-v32/kernel/process.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/cris/arch-v32/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -120,7 +120,8 @@ kernel_thread(int (*fn)(void *), void * regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT); /* Create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19/arch/cris/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/cris/kernel/irq.c --- linux-2.6.19/arch/cris/kernel/irq.c 2006-09-20 16:57:57 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/cris/kernel/irq.c 2006-11-30 18:26:05 +0100 @@ -92,6 +92,7 @@ skip: asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { unsigned long sp; + irq_enter(); sp = rdsp(); if (unlikely((sp & (PAGE_SIZE - 1)) < (PAGE_SIZE/8))) { diff -NurpP --minimal linux-2.6.19/arch/frv/kernel/kernel_thread.S linux-2.6.19-vs2.1.x-t1/arch/frv/kernel/kernel_thread.S --- linux-2.6.19/arch/frv/kernel/kernel_thread.S 2005-03-02 12:38:20 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/frv/kernel/kernel_thread.S 2006-11-08 04:57:50 +0100 @@ -13,6 +13,8 @@ #include #define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_KTHREAD 0x10000000 /* kernel thread */ +#define CLONE_KT (CLONE_VM | CLONE_KTHREAD) /* kernel thread flags */ #define KERN_ERR "<3>" .section .rodata @@ -37,7 +39,7 @@ kernel_thread: # start by forking the current process, but with shared VM setlos.p #__NR_clone,gr7 ; syscall number - ori gr10,#CLONE_VM,gr8 ; first syscall arg [clone_flags] + ori gr10,#CLONE_KT,gr8 ; first syscall arg [clone_flags] sethi.p #0xe4e4,gr9 ; second syscall arg [newsp] setlo #0xe4e4,gr9 setlos.p #0,gr10 ; third syscall arg [parent_tidptr] diff -NurpP --minimal linux-2.6.19/arch/h8300/Kconfig linux-2.6.19-vs2.1.x-t1/arch/h8300/Kconfig --- linux-2.6.19/arch/h8300/Kconfig 2006-06-18 04:51:49 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/h8300/Kconfig 2006-11-08 04:57:40 +0100 @@ -199,6 +199,8 @@ source "fs/Kconfig" source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/h8300/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/h8300/kernel/process.c --- linux-2.6.19/arch/h8300/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/h8300/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -134,7 +134,7 @@ int kernel_thread(int (*fn)(void *), voi fs = get_fs(); set_fs (KERNEL_DS); - clone_arg = flags | CLONE_VM; + clone_arg = flags | CLONE_VM | CLONE_KTHREAD; __asm__("mov.l sp,er3\n\t" "sub.l er2,er2\n\t" "mov.l %2,er1\n\t" diff -NurpP --minimal linux-2.6.19/arch/i386/Kconfig linux-2.6.19-vs2.1.x-t1/arch/i386/Kconfig --- linux-2.6.19/arch/i386/Kconfig 2006-11-30 21:18:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/Kconfig 2006-11-08 04:57:40 +0100 @@ -1153,6 +1153,8 @@ endmenu source "arch/i386/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/irq.c --- linux-2.6.19/arch/i386/kernel/irq.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/irq.c 2006-11-30 18:26:37 +0100 @@ -84,7 +84,6 @@ fastcall unsigned int do_IRQ(struct pt_r } } #endif - #ifdef CONFIG_4KSTACKS curctx = (union irq_ctx *) current_thread_info(); @@ -124,7 +123,6 @@ fastcall unsigned int do_IRQ(struct pt_r } else #endif desc->handle_irq(irq, desc); - irq_exit(); set_irq_regs(old_regs); return 1; diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/process.c --- linux-2.6.19/arch/i386/kernel/process.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/process.c 2006-11-30 20:55:45 +0100 @@ -300,8 +300,10 @@ void show_regs(struct pt_regs * regs) unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d[#%u], comm: %20s\n", + current->pid, current->xid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d\n", + 0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); if (user_mode_vm(regs)) @@ -352,7 +354,8 @@ int kernel_thread(int (*fn)(void *), voi regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/sys_i386.c linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/sys_i386.c --- linux-2.6.19/arch/i386/kernel/sys_i386.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/sys_i386.c 2006-11-08 06:12:20 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/syscall_table.S linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/syscall_table.S --- linux-2.6.19/arch/i386/kernel/syscall_table.S 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/syscall_table.S 2006-11-08 04:57:41 +0100 @@ -272,7 +272,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/sysenter.c linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/sysenter.c --- linux-2.6.19/arch/i386/kernel/sysenter.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/sysenter.c 2006-11-08 04:57:47 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -156,7 +157,7 @@ int arch_setup_additional_pages(struct l current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); - mm->total_vm++; + vx_vmpages_inc(mm); up_fail: up_write(&mm->mmap_sem); return ret; diff -NurpP --minimal linux-2.6.19/arch/i386/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/traps.c --- linux-2.6.19/arch/i386/kernel/traps.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/i386/kernel/traps.c 2006-11-30 20:55:45 +0100 @@ -54,6 +54,8 @@ #include #include +#include +#include #include "mach_traps.h" @@ -371,8 +373,8 @@ void show_registers(struct pt_regs *regs regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + printk(KERN_EMERG "Process %.*s (pid: %d[#%u], ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, current->xid, current_thread_info(), current, current->thread_info); /* * When in-kernel, we also print out the stack and code at the @@ -461,6 +463,8 @@ void die(const char * str, struct pt_reg oops_enter(); + vxh_throw_oops(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -497,9 +501,9 @@ void die(const char * str, struct pt_reg if (nl) printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) != - NOTIFY_STOP) { + current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { show_registers(regs); + vxh_dump_history(); /* Executive summary in case the oops scrolled away */ esp = (unsigned long) (®s->esp); savesegment(ss, ss); diff -NurpP --minimal linux-2.6.19/arch/ia64/Kconfig linux-2.6.19-vs2.1.x-t1/arch/ia64/Kconfig --- linux-2.6.19/arch/ia64/Kconfig 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/Kconfig 2006-11-20 21:12:32 +0100 @@ -537,6 +537,8 @@ endmenu source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/ia64/ia32/binfmt_elf32.c linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.19/arch/ia64/ia32/binfmt_elf32.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/binfmt_elf32.c 2006-11-08 04:57:47 +0100 @@ -238,7 +238,8 @@ ia32_setup_arg_pages (struct linux_binpr kmem_cache_free(vm_area_cachep, mpnt); return ret; } - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt)); + current->mm->stack_vm = current->mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.19/arch/ia64/ia32/ia32_entry.S linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/ia32_entry.S --- linux-2.6.19/arch/ia64/ia32/ia32_entry.S 2006-06-18 04:51:55 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/ia32_entry.S 2006-11-08 04:57:41 +0100 @@ -483,7 +483,7 @@ ia32_syscall_table: data8 sys_tgkill /* 270 */ data8 compat_sys_utimes data8 sys32_fadvise64_64 - data8 sys_ni_syscall + data8 sys32_vserver data8 sys_ni_syscall data8 sys_ni_syscall /* 275 */ data8 sys_ni_syscall diff -NurpP --minimal linux-2.6.19/arch/ia64/ia32/sys_ia32.c linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/sys_ia32.c --- linux-2.6.19/arch/ia64/ia32/sys_ia32.c 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/ia32/sys_ia32.c 2006-11-08 04:57:44 +0100 @@ -1182,7 +1182,7 @@ sys32_gettimeofday (struct compat_timeva { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/asm-offsets.c linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/asm-offsets.c --- linux-2.6.19/arch/ia64/kernel/asm-offsets.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -191,6 +191,7 @@ void foo(void) /* for assembly files which can't include sched.h: */ DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); DEFINE(IA64_CLONE_VM, CLONE_VM); + DEFINE(IA64_CLONE_KTHREAD, CLONE_KTHREAD); BLANK(); DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/entry.S linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/entry.S --- linux-2.6.19/arch/ia64/kernel/entry.S 2006-11-30 21:18:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/entry.S 2006-11-08 04:57:41 +0100 @@ -1576,7 +1576,7 @@ sys_call_table: data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_ni_syscall // reserved for kexec_load - data8 sys_ni_syscall // reserved for vserver + data8 sys_vserver data8 sys_waitid // 1270 data8 sys_add_key data8 sys_request_key diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/perfmon.c linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/perfmon.c --- linux-2.6.19/arch/ia64/kernel/perfmon.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/perfmon.c 2006-11-08 04:57:47 +0100 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -2357,7 +2358,7 @@ pfm_smpl_buffer_alloc(struct task_struct */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, vma_pages(vma)); up_write(&task->mm->mmap_sem); diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/process.c --- linux-2.6.19/arch/ia64/kernel/process.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -105,7 +105,8 @@ show_regs (struct pt_regs *regs) unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm); + printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", + current->pid, current->xid, smp_processor_id(), current->comm); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted()); print_symbol("ip is at %s\n", ip); @@ -688,7 +689,8 @@ kernel_thread (int (*fn)(void *), void * regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; regs.sw.pr = (1 << PRED_KERNEL_STACK); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s.pt, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/ptrace.c --- linux-2.6.19/arch/ia64/kernel/ptrace.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1442,6 +1443,9 @@ sys_ptrace (long request, pid_t pid, uns read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff -NurpP --minimal linux-2.6.19/arch/ia64/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/traps.c --- linux-2.6.19/arch/ia64/kernel/traps.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -106,8 +106,9 @@ die (const char *str, struct pt_regs *re put_cpu(); if (++die.lock_owner_depth < 3) { - printk("%s[%d]: %s %ld [%d]\n", - current->comm, current->pid, str, err, ++die_counter); + printk("%s[%d[#%u]]: %s %ld [%d]\n", + current->comm, current->pid, current->xid, + str, err, ++die_counter); (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_regs(regs); } else @@ -331,8 +332,9 @@ handle_fpu_swa (int fp_fault, struct pt_ last_time = jiffies; ++fpu_swa_count; printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, current->pid, current->xid, + regs->cr_iip + ia64_psr(regs)->ri, isr); } exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, diff -NurpP --minimal linux-2.6.19/arch/ia64/mm/fault.c linux-2.6.19-vs2.1.x-t1/arch/ia64/mm/fault.c --- linux-2.6.19/arch/ia64/mm/fault.c 2006-11-30 21:18:27 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/mm/fault.c 2006-11-08 04:57:40 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/ia64/sn/kernel/xpc_main.c linux-2.6.19-vs2.1.x-t1/arch/ia64/sn/kernel/xpc_main.c --- linux-2.6.19/arch/ia64/sn/kernel/xpc_main.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ia64/sn/kernel/xpc_main.c 2006-11-08 04:57:40 +0100 @@ -108,6 +108,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_min_interval, @@ -121,6 +122,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_hb_check_min_interval, @@ -145,6 +147,7 @@ static ctl_table xpc_sys_xpc_dir[] = { 0644, NULL, &proc_dointvec_minmax, + NULL, &sysctl_intvec, NULL, &xpc_disengage_request_min_timelimit, diff -NurpP --minimal linux-2.6.19/arch/m32r/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/irq.c --- linux-2.6.19/arch/m32r/kernel/irq.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/irq.c 2006-11-30 18:26:50 +0100 @@ -78,6 +78,7 @@ skip: asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) { struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); irq_enter(); diff -NurpP --minimal linux-2.6.19/arch/m32r/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/process.c --- linux-2.6.19/arch/m32r/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -211,8 +211,8 @@ int kernel_thread(int (*fn)(void *), voi regs.psw = M32R_PSW_BIE; /* Ok, create the new process. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, - NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19/arch/m32r/kernel/sys_m32r.c linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/sys_m32r.c --- linux-2.6.19/arch/m32r/kernel/sys_m32r.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/sys_m32r.c 2006-11-08 04:57:40 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/m32r/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/traps.c --- linux-2.6.19/arch/m32r/kernel/traps.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m32r/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -195,8 +195,9 @@ static void show_registers(struct pt_reg } else { printk("SPI: %08lx\n", sp); } - printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)", - current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current); + printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)", + current->comm, current->pid, current->xid, + 0xffff & i, 4096+(unsigned long)current); /* * When in-kernel, we also print out the stack and code at the diff -NurpP --minimal linux-2.6.19/arch/m68k/Kconfig linux-2.6.19-vs2.1.x-t1/arch/m68k/Kconfig --- linux-2.6.19/arch/m68k/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m68k/Kconfig 2006-11-08 04:57:40 +0100 @@ -654,6 +654,8 @@ source "fs/Kconfig" source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/m68k/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/process.c --- linux-2.6.19/arch/m68k/kernel/process.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -159,7 +159,8 @@ int kernel_thread(int (*fn)(void *), voi { register long retval __asm__ ("d0"); - register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED; + register long clone_arg __asm__ ("d1") = + flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; retval = __NR_clone; __asm__ __volatile__ diff -NurpP --minimal linux-2.6.19/arch/m68k/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/ptrace.c --- linux-2.6.19/arch/m68k/kernel/ptrace.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -279,6 +280,8 @@ long arch_ptrace(struct task_struct *chi ret = ptrace_request(child, request, addr, data); break; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out_tsk; return ret; out_eio: diff -NurpP --minimal linux-2.6.19/arch/m68k/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/traps.c --- linux-2.6.19/arch/m68k/kernel/traps.c 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m68k/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -899,8 +899,8 @@ void show_registers(struct pt_regs *regs printk("d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", regs->d4, regs->d5, regs->a0, regs->a1); - printk("Process %s (pid: %d, task=%p)\n", - current->comm, current->pid, current); + printk("Process %s (pid: %d[#%u], task=%p)\n", + current->comm, current->pid, current->xid, current); addr = (unsigned long)&fp->un; printk("Frame format=%X ", regs->format); switch (regs->format) { diff -NurpP --minimal linux-2.6.19/arch/m68knommu/Kconfig linux-2.6.19-vs2.1.x-t1/arch/m68knommu/Kconfig --- linux-2.6.19/arch/m68knommu/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/m68knommu/Kconfig 2006-11-08 04:57:40 +0100 @@ -663,6 +663,8 @@ source "fs/Kconfig" source "arch/m68knommu/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/m68knommu/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/m68knommu/kernel/process.c --- linux-2.6.19/arch/m68knommu/kernel/process.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/m68knommu/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -122,7 +122,7 @@ void show_regs(struct pt_regs * regs) int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int retval; - long clone_arg = flags | CLONE_VM; + long clone_arg = flags | CLONE_VM | CLONE_KTHREAD; mm_segment_t fs; fs = get_fs(); diff -NurpP --minimal linux-2.6.19/arch/m68knommu/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/m68knommu/kernel/traps.c --- linux-2.6.19/arch/m68knommu/kernel/traps.c 2006-09-20 16:57:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/m68knommu/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -80,8 +80,9 @@ void die_if_kernel(char *str, struct pt_ printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", fp->d4, fp->d5, fp->a0, fp->a1); - printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n", - current->comm, current->pid, PAGE_SIZE+(unsigned long)current); + printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n", + current->comm, current->pid, current->xid, + PAGE_SIZE+(unsigned long)current); show_stack(NULL, (unsigned long *)fp); do_exit(SIGSEGV); } diff -NurpP --minimal linux-2.6.19/arch/mips/Kconfig linux-2.6.19-vs2.1.x-t1/arch/mips/Kconfig --- linux-2.6.19/arch/mips/Kconfig 2006-11-30 21:18:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/Kconfig 2006-11-08 21:52:07 +0100 @@ -2006,6 +2006,8 @@ source "arch/mips/oprofile/Kconfig" source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/linux32.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/linux32.c --- linux-2.6.19/arch/mips/kernel/linux32.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/linux32.c 2006-11-08 04:57:44 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -300,7 +301,7 @@ sys32_gettimeofday(struct compat_timeval { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/process.c --- linux-2.6.19/arch/mips/kernel/process.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -271,7 +271,8 @@ long kernel_thread(int (*fn)(void *), vo #endif /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/ptrace.c --- linux-2.6.19/arch/mips/kernel/ptrace.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,9 @@ long arch_ptrace(struct task_struct *chi { int ret; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out; + switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/scall32-o32.S linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall32-o32.S --- linux-2.6.19/arch/mips/kernel/scall32-o32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall32-o32.S 2006-11-08 04:57:41 +0100 @@ -619,7 +619,7 @@ einval: li v0, -EINVAL sys sys_mq_timedreceive 5 sys sys_mq_notify 2 /* 4275 */ sys sys_mq_getsetattr 3 - sys sys_ni_syscall 0 /* sys_vserver */ + sys sys_vserver 3 sys sys_waitid 5 sys sys_ni_syscall 0 /* available, was setaltroot */ sys sys_add_key 5 /* 4280 */ diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/scall64-64.S linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-64.S --- linux-2.6.19/arch/mips/kernel/scall64-64.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-64.S 2006-11-08 04:57:41 +0100 @@ -434,7 +434,7 @@ sys_call_table: PTR sys_mq_timedreceive PTR sys_mq_notify PTR sys_mq_getsetattr /* 5235 */ - PTR sys_ni_syscall /* sys_vserver */ + PTR sys_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/scall64-n32.S linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-n32.S --- linux-2.6.19/arch/mips/kernel/scall64-n32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-n32.S 2006-11-08 04:57:41 +0100 @@ -360,7 +360,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* 6240, sys_vserver */ + PTR sys32_vserver /* 6240 */ PTR sysn32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/scall64-o32.S linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-o32.S --- linux-2.6.19/arch/mips/kernel/scall64-o32.S 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/scall64-o32.S 2006-11-08 04:57:41 +0100 @@ -482,7 +482,7 @@ sys_call_table: PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify /* 4275 */ PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* sys_vserver */ + PTR sys32_vserver PTR sys32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/syscall.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/syscall.c --- linux-2.6.19/arch/mips/kernel/syscall.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/syscall.c 2006-11-08 04:57:40 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/sysirix.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/sysirix.c --- linux-2.6.19/arch/mips/kernel/sysirix.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/sysirix.c 2006-11-08 04:57:40 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/mips/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/traps.c --- linux-2.6.19/arch/mips/kernel/traps.c 2006-11-30 21:18:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/mips/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -297,8 +297,9 @@ void show_registers(struct pt_regs *regs { show_regs(regs); print_modules(); - printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", - current->comm, current->pid, current_thread_info(), current); + printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p)\n", + current->comm, current->pid, current->xid, + current_thread_info(), current); show_stacktrace(current, regs); show_code((unsigned int *) regs->cp0_epc); printk("\n"); diff -NurpP --minimal linux-2.6.19/arch/parisc/Kconfig linux-2.6.19-vs2.1.x-t1/arch/parisc/Kconfig --- linux-2.6.19/arch/parisc/Kconfig 2006-11-30 21:18:30 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/Kconfig 2006-11-08 04:57:40 +0100 @@ -257,6 +257,8 @@ source "arch/parisc/oprofile/Kconfig" source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/parisc/hpux/sys_hpux.c linux-2.6.19-vs2.1.x-t1/arch/parisc/hpux/sys_hpux.c --- linux-2.6.19/arch/parisc/hpux/sys_hpux.c 2006-11-30 21:18:30 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/hpux/sys_hpux.c 2006-11-08 04:57:40 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/parisc/kernel/entry.S linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/entry.S --- linux-2.6.19/arch/parisc/kernel/entry.S 2006-11-30 21:18:30 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/entry.S 2006-11-08 04:57:50 +0100 @@ -761,6 +761,7 @@ fault_vector_11: #define CLONE_VM 0x100 /* Must agree with */ #define CLONE_UNTRACED 0x00800000 +#define CLONE_KTHREAD 0x10000000 .export __kernel_thread, code .import do_fork diff -NurpP --minimal linux-2.6.19/arch/parisc/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/process.c --- linux-2.6.19/arch/parisc/kernel/process.c 2006-11-30 21:18:30 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -173,7 +173,7 @@ pid_t kernel_thread(int (*fn)(void *), v * kernel_thread can become a #define. */ - return __kernel_thread(fn, arg, flags); + return __kernel_thread(fn, arg, flags | CLONE_KTHREAD); } EXPORT_SYMBOL(kernel_thread); diff -NurpP --minimal linux-2.6.19/arch/parisc/kernel/sys_parisc32.c linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/sys_parisc32.c --- linux-2.6.19/arch/parisc/kernel/sys_parisc32.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/sys_parisc32.c 2006-11-08 04:57:44 +0100 @@ -204,11 +204,11 @@ static inline long get_ts32(struct times asmlinkage int sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) { - extern void do_gettimeofday(struct timeval *tv); + extern void vx_gettimeofday(struct timeval *tv); if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_compat_timeval(tv, &ktv)) return -EFAULT; } @@ -612,6 +612,7 @@ asmlinkage int sys32_sysinfo(struct sysi do { seq = read_seqbegin(&xtime_lock); + /* FIXME: requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff -NurpP --minimal linux-2.6.19/arch/parisc/kernel/syscall_table.S linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/syscall_table.S --- linux-2.6.19/arch/parisc/kernel/syscall_table.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/syscall_table.S 2006-11-08 04:57:41 +0100 @@ -368,7 +368,7 @@ ENTRY_COMP(mbind) /* 260 */ ENTRY_COMP(get_mempolicy) ENTRY_COMP(set_mempolicy) - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ + ENTRY_DIFF(vserver) ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ ENTRY_SAME(keyctl) diff -NurpP --minimal linux-2.6.19/arch/parisc/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/traps.c --- linux-2.6.19/arch/parisc/kernel/traps.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -210,8 +210,9 @@ void die_if_kernel(char *str, struct pt_ if (err == 0) return; /* STFU */ - printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n", - current->comm, current->pid, str, err, regs->iaoq[0]); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n", + current->comm, current->pid, current->xid, + str, err, regs->iaoq[0]); #ifdef PRINT_USER_FAULTS /* XXX for debugging only */ show_regs(regs); @@ -242,8 +243,8 @@ void die_if_kernel(char *str, struct pt_ if (!console_drivers) pdc_console_restart(); - printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n", - current->comm, current->pid, str, err); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n", + current->comm, current->pid, current->xid, str, err); show_regs(regs); if (in_interrupt()) diff -NurpP --minimal linux-2.6.19/arch/parisc/mm/fault.c linux-2.6.19-vs2.1.x-t1/arch/parisc/mm/fault.c --- linux-2.6.19/arch/parisc/mm/fault.c 2006-06-18 04:52:15 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/parisc/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -213,8 +213,9 @@ bad_area: #ifdef PRINT_USER_FAULTS printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n", - tsk->pid, tsk->comm, code, address); + printk(KERN_DEBUG "do_page_fault() pid=%d:#%u " + "command='%s' type=%lu address=0x%08lx\n", + tsk->pid, tsk->xid, tsk->comm, code, address); if (vma) { printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n", vma->vm_start, vma->vm_end); diff -NurpP --minimal linux-2.6.19/arch/powerpc/Kconfig linux-2.6.19-vs2.1.x-t1/arch/powerpc/Kconfig --- linux-2.6.19/arch/powerpc/Kconfig 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/Kconfig 2006-11-20 21:12:32 +0100 @@ -1102,6 +1102,8 @@ endmenu source "arch/powerpc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" config KEYS_COMPAT diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/asm-offsets.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/asm-offsets.c --- linux-2.6.19/arch/powerpc/kernel/asm-offsets.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -243,6 +243,7 @@ int main(void) DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); #ifndef CONFIG_PPC64 DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/irq.c --- linux-2.6.19/arch/powerpc/kernel/irq.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/irq.c 2006-11-30 18:27:23 +0100 @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/misc_32.S linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/misc_32.S --- linux-2.6.19/arch/powerpc/kernel/misc_32.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/misc_32.S 2006-11-08 04:57:50 +0100 @@ -749,7 +749,7 @@ _GLOBAL(kernel_thread) mr r30,r3 /* function */ mr r31,r4 /* argument */ ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,CLONE_UNTRACED>>16 + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/misc_64.S linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/misc_64.S --- linux-2.6.19/arch/powerpc/kernel/misc_64.S 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/misc_64.S 2006-11-08 04:57:50 +0100 @@ -394,7 +394,7 @@ _GLOBAL(kernel_thread) mr r29,r3 mr r30,r4 ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,(CLONE_UNTRACED>>16) + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/process.c --- linux-2.6.19/arch/powerpc/kernel/process.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/process.c 2006-11-08 04:57:42 +0100 @@ -425,8 +425,9 @@ void show_regs(struct pt_regs * regs) trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); - printk("TASK = %p[%d] '%s' THREAD: %p", - current, current->pid, current->comm, task_thread_info(current)); + printk("TASK = %p[%d,#%u] '%s' THREAD: %p", + current, current->pid, current->xid, + current->comm, task_thread_info(current)); #ifdef CONFIG_SMP printk(" CPU: %d", smp_processor_id()); diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/sys_ppc32.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/sys_ppc32.c --- linux-2.6.19/arch/powerpc/kernel/sys_ppc32.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/sys_ppc32.c 2006-11-08 04:57:44 +0100 @@ -275,7 +275,7 @@ asmlinkage long compat_sys_gettimeofday( { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/syscalls.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/syscalls.c --- linux-2.6.19/arch/powerpc/kernel/syscalls.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/syscalls.c 2006-11-08 04:57:40 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/traps.c --- linux-2.6.19/arch/powerpc/kernel/traps.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -888,8 +888,9 @@ void nonrecoverable_exception(struct pt_ void trace_syscall(struct pt_regs *regs) { - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], + printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, current->xid, + regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } diff -NurpP --minimal linux-2.6.19/arch/powerpc/kernel/vdso.c linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/vdso.c --- linux-2.6.19/arch/powerpc/kernel/vdso.c 2006-11-30 21:18:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/powerpc/kernel/vdso.c 2006-11-08 04:57:47 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -295,7 +296,7 @@ int arch_setup_additional_pages(struct l /* Put vDSO base into mm struct and account for memory usage */ current->mm->context.vdso_base = vdso_base; - mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + vx_vmpages_add(mm, (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); up_write(&mm->mmap_sem); return 0; diff -NurpP --minimal linux-2.6.19/arch/ppc/Kconfig linux-2.6.19-vs2.1.x-t1/arch/ppc/Kconfig --- linux-2.6.19/arch/ppc/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ppc/Kconfig 2006-11-08 04:57:40 +0100 @@ -1421,6 +1421,8 @@ source "arch/powerpc/oprofile/Kconfig" source "arch/ppc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/ppc/kernel/asm-offsets.c linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/asm-offsets.c --- linux-2.6.19/arch/ppc/kernel/asm-offsets.c 2006-09-20 16:58:01 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/asm-offsets.c 2006-11-08 04:57:50 +0100 @@ -121,6 +121,7 @@ main(void) DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(CLONE_KTHREAD, CLONE_KTHREAD); DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); /* About the CPU features table */ diff -NurpP --minimal linux-2.6.19/arch/ppc/kernel/misc.S linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/misc.S --- linux-2.6.19/arch/ppc/kernel/misc.S 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/misc.S 2006-11-08 04:57:50 +0100 @@ -848,7 +848,7 @@ _GLOBAL(kernel_thread) mr r30,r3 /* function */ mr r31,r4 /* argument */ ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,CLONE_UNTRACED>>16 + oris r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16 li r4,0 /* new sp (unused) */ li r0,__NR_clone sc diff -NurpP --minimal linux-2.6.19/arch/ppc/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/traps.c --- linux-2.6.19/arch/ppc/kernel/traps.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/ppc/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -748,8 +748,9 @@ void nonrecoverable_exception(struct pt_ void trace_syscall(struct pt_regs *regs) { - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, current->pid, regs->nip, regs->link, regs->gpr[0], + printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, current->xid, + regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } diff -NurpP --minimal linux-2.6.19/arch/s390/Kconfig linux-2.6.19-vs2.1.x-t1/arch/s390/Kconfig --- linux-2.6.19/arch/s390/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/s390/Kconfig 2006-11-08 21:52:08 +0100 @@ -519,6 +519,8 @@ endmenu source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/s390/kernel/compat_linux.c linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/compat_linux.c --- linux-2.6.19/arch/s390/kernel/compat_linux.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/compat_linux.c 2006-11-08 04:57:44 +0100 @@ -600,7 +600,7 @@ asmlinkage long sys32_gettimeofday(struc { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/s390/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/process.c --- linux-2.6.19/arch/s390/kernel/process.c 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -165,9 +165,9 @@ void show_regs(struct pt_regs *regs) struct task_struct *tsk = current; printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted()); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, (void *) tsk, - (void *) tsk->thread.ksp); + printk("Process %s (pid: %d[#%u], task: %p, ksp: %p)\n", + current->comm, current->pid, current->xid, + (void *) tsk, (void *) tsk->thread.ksp); show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ @@ -198,7 +198,7 @@ int kernel_thread(int (*fn)(void *), voi regs.orig_gpr2 = -1; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, 0, ®s, 0, NULL, NULL); } diff -NurpP --minimal linux-2.6.19/arch/s390/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/ptrace.c --- linux-2.6.19/arch/s390/kernel/ptrace.c 2006-06-18 04:52:33 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -723,7 +724,13 @@ sys_ptrace(long request, long pid, long goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + ret = -EPERM; + goto out_tsk; + } + ret = do_ptrace(child, request, addr, data); +out_tsk: put_task_struct(child); out: unlock_kernel(); diff -NurpP --minimal linux-2.6.19/arch/s390/kernel/syscalls.S linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/syscalls.S --- linux-2.6.19/arch/s390/kernel/syscalls.S 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/s390/kernel/syscalls.S 2006-11-08 04:57:41 +0100 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys32_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff -NurpP --minimal linux-2.6.19/arch/sh/Kconfig linux-2.6.19-vs2.1.x-t1/arch/sh/Kconfig --- linux-2.6.19/arch/sh/Kconfig 2006-11-30 21:18:32 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/Kconfig 2006-11-08 04:57:40 +0100 @@ -627,6 +627,8 @@ source "arch/sh/oprofile/Kconfig" source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/sh/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/irq.c --- linux-2.6.19/arch/sh/kernel/irq.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/irq.c 2006-11-30 18:27:47 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/arch/sh/kernel/kgdb_stub.c linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/kgdb_stub.c --- linux-2.6.19/arch/sh/kernel/kgdb_stub.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/kgdb_stub.c 2006-11-08 04:57:52 +0100 @@ -389,7 +389,7 @@ static struct task_struct *get_thread(in if (pid == PID_MAX) pid = 0; /* First check via PID */ - thread = find_task_by_pid(pid); + thread = find_task_by_real_pid(pid); if (thread) return thread; diff -NurpP --minimal linux-2.6.19/arch/sh/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/process.c --- linux-2.6.19/arch/sh/kernel/process.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -104,7 +104,8 @@ void machine_power_off(void) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid : %d, Comm: %20s\n", current->pid, current->comm); + printk("Pid : %d:#%u, Comm: %20s\n", + current->pid, current->xid, current->comm); print_symbol("PC is at %s\n", instruction_pointer(regs)); printk("PC : %08lx SP : %08lx SR : %08lx ", regs->pc, regs->regs[15], regs->sr); @@ -164,7 +165,8 @@ int kernel_thread(int (*fn)(void *), voi regs.sr = (1 << 30); /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19/arch/sh/kernel/setup.c linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/setup.c --- linux-2.6.19/arch/sh/kernel/setup.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/setup.c 2006-11-08 04:57:40 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/arch/sh/kernel/sys_sh.c linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/sys_sh.c --- linux-2.6.19/arch/sh/kernel/sys_sh.c 2006-11-30 21:18:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh/kernel/sys_sh.c 2006-11-08 04:57:40 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/arch/sh64/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/sh64/kernel/process.c --- linux-2.6.19/arch/sh64/kernel/process.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -400,8 +400,8 @@ int kernel_thread(int (*fn)(void *), voi regs.pc = (unsigned long)kernel_thread_helper; regs.sr = (1 << 30); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, - ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD, + 0, ®s, 0, NULL, NULL); } /* diff -NurpP --minimal linux-2.6.19/arch/sh64/kernel/sys_sh64.c linux-2.6.19-vs2.1.x-t1/arch/sh64/kernel/sys_sh64.c --- linux-2.6.19/arch/sh64/kernel/sys_sh64.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh64/kernel/sys_sh64.c 2006-11-08 04:57:40 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/arch/sh64/mm/fault.c linux-2.6.19-vs2.1.x-t1/arch/sh64/mm/fault.c --- linux-2.6.19/arch/sh64/mm/fault.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sh64/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -82,7 +82,7 @@ static inline void print_vma(struct vm_a static inline void print_task(struct task_struct *tsk) { - printk("Task pid %d\n", tsk->pid); + printk("Task pid %d:#%u\n", tsk->pid, tsk->xid); } static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address) diff -NurpP --minimal linux-2.6.19/arch/sparc/Kconfig linux-2.6.19-vs2.1.x-t1/arch/sparc/Kconfig --- linux-2.6.19/arch/sparc/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/Kconfig 2006-11-08 04:57:40 +0100 @@ -298,6 +298,8 @@ endmenu source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/process.c --- linux-2.6.19/arch/sparc/kernel/process.c 2006-09-20 16:58:01 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -705,7 +705,8 @@ pid_t kernel_thread(int (*fn)(void *), v /* Notreached by child. */ "1: mov %%o0, %0\n\t" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/ptrace.c --- linux-2.6.19/arch/sparc/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -299,6 +300,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/sys_sparc.c linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/sys_sparc.c --- linux-2.6.19/arch/sparc/kernel/sys_sparc.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/sys_sparc.c 2006-11-08 04:57:40 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/sys_sunos.c linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/sys_sunos.c --- linux-2.6.19/arch/sparc/kernel/sys_sunos.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/sys_sunos.c 2006-11-08 04:57:40 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/systbls.S linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/systbls.S --- linux-2.6.19/arch/sparc/kernel/systbls.S 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/systbls.S 2006-11-08 21:52:08 +0100 @@ -71,7 +71,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.19/arch/sparc/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/traps.c --- linux-2.6.19/arch/sparc/kernel/traps.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc/kernel/traps.c 2006-11-08 04:57:42 +0100 @@ -99,7 +99,8 @@ void die_if_kernel(char *str, struct pt_ " /_| \\__/ |_\\\n" " \\__U_/\n"); - printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); + printk("%s(%d[#%u]): %s [#%d]\n", current->comm, + current->pid, current->xid, str, ++die_counter); show_regs(regs); __SAVE; __SAVE; __SAVE; __SAVE; diff -NurpP --minimal linux-2.6.19/arch/sparc64/Kconfig linux-2.6.19-vs2.1.x-t1/arch/sparc64/Kconfig --- linux-2.6.19/arch/sparc64/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/Kconfig 2006-11-08 04:57:40 +0100 @@ -431,6 +431,8 @@ endmenu source "arch/sparc64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/binfmt_aout32.c --- linux-2.6.19/arch/sparc64/kernel/binfmt_aout32.c 2006-06-18 04:52:34 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/binfmt_aout32.c 2006-11-08 04:57:40 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/process.c --- linux-2.6.19/arch/sparc64/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -696,7 +696,8 @@ pid_t kernel_thread(int (*fn)(void *), v /* Notreached by child. */ "1:" : "=r" (retval) : - "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_clone), "r" (flags | + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); return retval; diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/ptrace.c --- linux-2.6.19/arch/sparc64/kernel/ptrace.c 2006-06-18 04:52:35 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -212,6 +213,10 @@ asmlinkage void do_ptrace(struct pt_regs pt_error_return(regs, -ret); goto out; } + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/sys_sparc.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sparc.c --- linux-2.6.19/arch/sparc64/kernel/sys_sparc.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sparc.c 2006-11-08 04:57:40 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/sys_sparc32.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sparc32.c --- linux-2.6.19/arch/sparc64/kernel/sys_sparc32.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sparc32.c 2006-11-08 04:57:44 +0100 @@ -793,7 +793,7 @@ asmlinkage long sys32_gettimeofday(struc { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/sys_sunos32.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sunos32.c --- linux-2.6.19/arch/sparc64/kernel/sys_sunos32.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/sys_sunos32.c 2006-11-08 04:57:40 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/systbls.S linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/systbls.S --- linux-2.6.19/arch/sparc64/kernel/systbls.S 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/systbls.S 2006-11-08 21:52:08 +0100 @@ -72,7 +72,7 @@ sys_call_table32: /*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat @@ -142,7 +142,7 @@ sys_call_table: /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff -NurpP --minimal linux-2.6.19/arch/sparc64/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/traps.c --- linux-2.6.19/arch/sparc64/kernel/traps.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/kernel/traps.c 2006-11-08 21:52:08 +0100 @@ -2223,7 +2223,8 @@ void die_if_kernel(char *str, struct pt_ " /_| \\__/ |_\\\n" " \\__U_/\n"); - printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); + printk("%s(%d[#%u]): %s [#%d]\n", current->comm, + current->pid, current->xid, str, ++die_counter); notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); __asm__ __volatile__("flushw"); __show_regs(regs); diff -NurpP --minimal linux-2.6.19/arch/sparc64/solaris/fs.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/solaris/fs.c --- linux-2.6.19/arch/sparc64/solaris/fs.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/solaris/fs.c 2006-11-08 04:57:52 +0100 @@ -368,7 +368,7 @@ static int report_statvfs(struct vfsmoun int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; @@ -404,7 +404,7 @@ static int report_statvfs64(struct vfsmo int j = strlen (p); if (j > 15) j = 15; - if (IS_RDONLY(inode)) i = 1; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1; if (mnt->mnt_flags & MNT_NOSUID) i |= 2; if (!sysv_valid_dev(inode->i_sb->s_dev)) return -EOVERFLOW; diff -NurpP --minimal linux-2.6.19/arch/sparc64/solaris/misc.c linux-2.6.19-vs2.1.x-t1/arch/sparc64/solaris/misc.c --- linux-2.6.19/arch/sparc64/solaris/misc.c 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/sparc64/solaris/misc.c 2006-11-08 04:57:40 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/um/Kconfig linux-2.6.19-vs2.1.x-t1/arch/um/Kconfig --- linux-2.6.19/arch/um/Kconfig 2006-11-30 21:18:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/um/Kconfig 2006-11-08 04:57:40 +0100 @@ -306,6 +306,8 @@ source "drivers/connector/Kconfig" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/um/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/um/kernel/irq.c --- linux-2.6.19/arch/um/kernel/irq.c 2006-11-30 21:18:36 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/um/kernel/irq.c 2006-11-30 18:28:41 +0100 @@ -357,6 +357,7 @@ void forward_interrupts(int pid) unsigned int do_IRQ(int irq, union uml_pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + irq_enter(); __do_IRQ(irq); irq_exit(); diff -NurpP --minimal linux-2.6.19/arch/um/kernel/syscall.c linux-2.6.19-vs2.1.x-t1/arch/um/kernel/syscall.c --- linux-2.6.19/arch/um/kernel/syscall.c 2006-11-30 21:18:36 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/um/kernel/syscall.c 2006-11-08 04:57:44 +0100 @@ -15,6 +15,8 @@ #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" +#include + #include "asm/mman.h" #include "asm/uaccess.h" #include "kern_util.h" @@ -118,6 +120,7 @@ long sys_uname(struct old_utsname __user long sys_olduname(struct oldold_utsname __user * name) { long error; + struct new_utsname *ptr; if (!name) return -EFAULT; diff -NurpP --minimal linux-2.6.19/arch/um/sys-x86_64/syscalls.c linux-2.6.19-vs2.1.x-t1/arch/um/sys-x86_64/syscalls.c --- linux-2.6.19/arch/um/sys-x86_64/syscalls.c 2006-11-30 21:18:36 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/um/sys-x86_64/syscalls.c 2006-11-08 04:57:40 +0100 @@ -9,6 +9,7 @@ #include "linux/shm.h" #include "linux/utsname.h" #include "linux/personality.h" +#include "linux/vs_cvirt.h" #include "asm/uaccess.h" #define __FRAME_OFFSETS #include "asm/ptrace.h" diff -NurpP --minimal linux-2.6.19/arch/v850/Kconfig linux-2.6.19-vs2.1.x-t1/arch/v850/Kconfig --- linux-2.6.19/arch/v850/Kconfig 2006-06-18 04:52:42 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/v850/Kconfig 2006-11-08 04:57:40 +0100 @@ -326,6 +326,8 @@ source "drivers/usb/Kconfig" source "arch/v850/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/v850/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/v850/kernel/process.c --- linux-2.6.19/arch/v850/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/v850/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -83,7 +83,7 @@ int kernel_thread (int (*fn)(void *), vo /* Clone this thread. Note that we don't pass the clone syscall's second argument -- it's ignored for calls from kernel mode (the child's SP is always set to the top of the kernel stack). */ - arg0 = flags | CLONE_VM; + arg0 = flags | CLONE_VM | CLONE_KTHREAD; syscall = __NR_clone; asm volatile ("trap " SYSCALL_SHORT_TRAP : "=r" (ret), "=r" (syscall) diff -NurpP --minimal linux-2.6.19/arch/v850/kernel/ptrace.c linux-2.6.19-vs2.1.x-t1/arch/v850/kernel/ptrace.c --- linux-2.6.19/arch/v850/kernel/ptrace.c 2006-04-09 13:49:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/v850/kernel/ptrace.c 2006-11-30 18:53:18 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,9 @@ long arch_ptrace(struct task_struct *chi { int rval; + if (!vx_check(vx_task_xid(child), VS_WATCH_P|VS_IDENT)) + goto out; + switch (request) { unsigned long val, copied; diff -NurpP --minimal linux-2.6.19/arch/x86_64/Kconfig linux-2.6.19-vs2.1.x-t1/arch/x86_64/Kconfig --- linux-2.6.19/arch/x86_64/Kconfig 2006-11-30 21:18:36 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/Kconfig 2006-11-08 04:57:40 +0100 @@ -701,6 +701,8 @@ endmenu source "arch/x86_64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff -NurpP --minimal linux-2.6.19/arch/x86_64/ia32/ia32_aout.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32_aout.c --- linux-2.6.19/arch/x86_64/ia32/ia32_aout.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32_aout.c 2006-11-08 04:57:40 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.19/arch/x86_64/ia32/ia32_binfmt.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32_binfmt.c 2006-11-08 04:57:47 +0100 @@ -375,7 +375,8 @@ int ia32_setup_arg_pages(struct linux_bi kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff -NurpP --minimal linux-2.6.19/arch/x86_64/ia32/ia32entry.S linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32entry.S --- linux-2.6.19/arch/x86_64/ia32/ia32entry.S 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/ia32entry.S 2006-11-08 04:57:41 +0100 @@ -672,7 +672,7 @@ ia32_sys_call_table: .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys32_vserver .quad sys_mbind .quad compat_sys_get_mempolicy /* 275 */ .quad sys_set_mempolicy diff -NurpP --minimal linux-2.6.19/arch/x86_64/ia32/sys_ia32.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/sys_ia32.c --- linux-2.6.19/arch/x86_64/ia32/sys_ia32.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/sys_ia32.c 2006-11-08 04:57:44 +0100 @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -454,7 +455,7 @@ sys32_gettimeofday(struct compat_timeval { if (tv) { struct timeval ktv; - do_gettimeofday(&ktv); + vx_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; } diff -NurpP --minimal linux-2.6.19/arch/x86_64/ia32/syscall32.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/syscall32.c --- linux-2.6.19/arch/x86_64/ia32/syscall32.c 2005-10-28 20:49:18 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/ia32/syscall32.c 2006-11-08 04:57:47 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ int syscall32_setup_pages(struct linux_b kmem_cache_free(vm_area_cachep, vma); return ret; } - mm->total_vm += npages; + vx_vmpages_add(mm, npages); up_write(&mm->mmap_sem); return 0; } diff -NurpP --minimal linux-2.6.19/arch/x86_64/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/irq.c --- linux-2.6.19/arch/x86_64/kernel/irq.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/irq.c 2006-11-30 18:28:58 +0100 @@ -123,7 +123,6 @@ asmlinkage unsigned int do_IRQ(struct pt else printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); - irq_exit(); set_irq_regs(old_regs); diff -NurpP --minimal linux-2.6.19/arch/x86_64/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/process.c --- linux-2.6.19/arch/x86_64/kernel/process.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/process.c 2006-11-30 20:55:45 +0100 @@ -54,7 +54,8 @@ asmlinkage extern void ret_from_fork(void); -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; +unsigned long kernel_thread_flags = + CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -301,8 +302,8 @@ void __show_regs(struct pt_regs * regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), + printk("Pid: %d:#%u, comm: %.20s %s %s %.*s\n", + current->pid, current->xid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); diff -NurpP --minimal linux-2.6.19/arch/x86_64/kernel/sys_x86_64.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/sys_x86_64.c --- linux-2.6.19/arch/x86_64/kernel/sys_x86_64.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/sys_x86_64.c 2006-11-08 04:57:40 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/arch/x86_64/kernel/traps.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/traps.c --- linux-2.6.19/arch/x86_64/kernel/traps.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/kernel/traps.c 2006-11-30 20:55:45 +0100 @@ -487,8 +487,9 @@ void show_registers(struct pt_regs *regs printk("CPU %d ", cpu); __show_regs(regs); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); + printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n", + cur->comm, cur->pid, cur->xid, + task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the @@ -657,8 +658,8 @@ static void __kprobes do_trap(int trapnr if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, signr)) printk(KERN_INFO - "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, str, + "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, str, regs->rip, regs->rsp, error_code); if (info) @@ -758,8 +759,8 @@ asmlinkage void __kprobes do_general_pro if (user_mode(regs)) { if (exception_trace && unhandled_signal(tsk, SIGSEGV)) printk(KERN_INFO - "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", - tsk->comm, tsk->pid, + "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, regs->rip, regs->rsp, error_code); force_sig(SIGSEGV, tsk); diff -NurpP --minimal linux-2.6.19/arch/x86_64/mm/fault.c linux-2.6.19-vs2.1.x-t1/arch/x86_64/mm/fault.c --- linux-2.6.19/arch/x86_64/mm/fault.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/x86_64/mm/fault.c 2006-11-08 04:57:42 +0100 @@ -514,10 +514,10 @@ bad_area_nosemaphore: if (exception_trace && unhandled_signal(tsk, SIGSEGV)) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d:#%u]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->rip, - regs->rsp, error_code); + tsk->comm, tsk->pid, tsk->xid, address, + regs->rip, regs->rsp, error_code); } tsk->thread.cr2 = address; diff -NurpP --minimal linux-2.6.19/arch/xtensa/kernel/irq.c linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/irq.c --- linux-2.6.19/arch/xtensa/kernel/irq.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/irq.c 2006-11-30 18:29:29 +0100 @@ -63,9 +63,7 @@ unsigned int do_IRQ(int irq, struct pt_ sp - sizeof(struct thread_info)); } #endif - __do_IRQ(irq, regs); - irq_exit(); return 1; diff -NurpP --minimal linux-2.6.19/arch/xtensa/kernel/process.c linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/process.c --- linux-2.6.19/arch/xtensa/kernel/process.c 2006-09-20 16:58:06 +0200 +++ linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/process.c 2006-11-08 04:57:50 +0100 @@ -206,7 +206,7 @@ int kernel_thread(int (*fn)(void *), voi :"=r" (retval) :"i" (__NR_clone), "i" (__NR_exit), "r" (arg), "r" (fn), - "r" (flags | CLONE_VM) + "r" (flags | CLONE_VM | CLONE_KTHREAD) : "a2", "a3", "a4", "a5", "a6" ); return retval; } diff -NurpP --minimal linux-2.6.19/arch/xtensa/kernel/syscalls.c linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/syscalls.c --- linux-2.6.19/arch/xtensa/kernel/syscalls.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/arch/xtensa/kernel/syscalls.c 2006-11-08 04:57:40 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/block/cfq-iosched.c linux-2.6.19-vs2.1.x-t1/block/cfq-iosched.c --- linux-2.6.19/block/cfq-iosched.c 2006-11-30 21:18:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/block/cfq-iosched.c 2006-11-08 21:52:08 +0100 @@ -221,6 +221,8 @@ static int cfq_queue_empty(request_queue static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) { + if (task->xid) + return task->xid + (1 << 16); if (rw == READ || rw == WRITE_SYNC) return task->pid; diff -NurpP --minimal linux-2.6.19/drivers/block/Kconfig linux-2.6.19-vs2.1.x-t1/drivers/block/Kconfig --- linux-2.6.19/drivers/block/Kconfig 2006-11-30 21:18:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/block/Kconfig 2006-11-08 04:57:51 +0100 @@ -316,6 +316,13 @@ config BLK_DEV_CRYPTOLOOP instead, which can be configured to be on-disk compatible with the cryptoloop device. +config BLK_DEV_VROOT + tristate "Virtual Root device support" + depends on QUOTACTL + ---help--- + Saying Y here will allow you to use quota/fs ioctls on a shared + partition within a virtual server without compromising security. + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff -NurpP --minimal linux-2.6.19/drivers/block/Makefile linux-2.6.19-vs2.1.x-t1/drivers/block/Makefile --- linux-2.6.19/drivers/block/Makefile 2006-06-18 04:52:46 +0200 +++ linux-2.6.19-vs2.1.x-t1/drivers/block/Makefile 2006-11-08 04:57:51 +0100 @@ -29,4 +29,5 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryp obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o diff -NurpP --minimal linux-2.6.19/drivers/block/loop.c linux-2.6.19-vs2.1.x-t1/drivers/block/loop.c --- linux-2.6.19/drivers/block/loop.c 2006-11-30 21:18:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/block/loop.c 2006-11-30 18:53:18 +0100 @@ -74,6 +74,7 @@ #include #include #include +#include #include @@ -795,6 +796,7 @@ static int loop_set_fd(struct loop_devic lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; + lo->lo_xid = vx_current_xid(); lo->lo_backing_file = file; lo->transfer = transfer_none; lo->ioctl = NULL; @@ -935,7 +937,7 @@ loop_set_status(struct loop_device *lo, struct loop_func_table *xfer; if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) return -EPERM; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1015,7 +1017,8 @@ loop_get_status(struct loop_device *lo, memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); info->lo_encrypt_type = lo->lo_encryption ? lo->lo_encryption->number : 0; - if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { + if (lo->lo_encrypt_key_size && + vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) { info->lo_encrypt_key_size = lo->lo_encrypt_key_size; memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); @@ -1326,6 +1329,9 @@ static int lo_open(struct inode *inode, { struct loop_device *lo = inode->i_bdev->bd_disk->private_data; + if (!vx_check(lo->lo_xid, VS_WATCH_P|VS_IDENT)) + return -EACCES; + mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); diff -NurpP --minimal linux-2.6.19/drivers/block/vroot.c linux-2.6.19-vs2.1.x-t1/drivers/block/vroot.c --- linux-2.6.19/drivers/block/vroot.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/block/vroot.c 2006-11-30 19:40:17 +0100 @@ -0,0 +1,281 @@ +/* + * linux/drivers/block/vroot.c + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6.10 by Herbert Pötzl, 30/12/2004 + * + * based on the loop.c code by Theodore Ts'o. + * + * Copyright (C) 2002-2006 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +static int max_vroot = 8; + +static struct vroot_device *vroot_dev; +static struct gendisk **disks; + + +static int vroot_set_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev, + unsigned int arg) +{ + struct block_device *real_bdev; + struct file *file; + struct inode *inode; + int error; + + error = -EBUSY; + if (vr->vr_state != Vr_unbound) + goto out; + + error = -EBADF; + file = fget(arg); + if (!file) + goto out; + + error = -EINVAL; + inode = file->f_dentry->d_inode; + + + if (S_ISBLK(inode->i_mode)) { + real_bdev = inode->i_bdev; + vr->vr_device = real_bdev; + __iget(real_bdev->bd_inode); + } else + goto out_fput; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_set_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + vr->vr_state = Vr_bound; + error = 0; + + out_fput: + fput(file); + out: + return error; +} + +static int vroot_clr_dev( + struct vroot_device *vr, + struct file *vr_file, + struct block_device *bdev) +{ + struct block_device *real_bdev; + + if (vr->vr_state != Vr_bound) + return -ENXIO; + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */ + return -EBUSY; + + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_clr_dev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + bdput(real_bdev); + vr->vr_state = Vr_unbound; + vr->vr_device = NULL; + return 0; +} + + +static int vr_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + int err; + + down(&vr->vr_ctl_mutex); + switch (cmd) { + case VROOT_SET_DEV: + err = vroot_set_dev(vr, file, inode->i_bdev, arg); + break; + case VROOT_CLR_DEV: + err = vroot_clr_dev(vr, file, inode->i_bdev); + break; + default: + err = -EINVAL; + break; + } + up(&vr->vr_ctl_mutex); + return err; +} + +static int vr_open(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + vr->vr_refcnt++; + up(&vr->vr_ctl_mutex); + return 0; +} + +static int vr_release(struct inode *inode, struct file *file) +{ + struct vroot_device *vr = inode->i_bdev->bd_disk->private_data; + + down(&vr->vr_ctl_mutex); + --vr->vr_refcnt; + up(&vr->vr_ctl_mutex); + return 0; +} + +static struct block_device_operations vr_fops = { + .owner = THIS_MODULE, + .open = vr_open, + .release = vr_release, + .ioctl = vr_ioctl, +}; + +struct block_device *__vroot_get_real_bdev(struct block_device *bdev) +{ + struct inode *inode = bdev->bd_inode; + struct vroot_device *vr; + struct block_device *real_bdev; + int minor = iminor(inode); + + vr = &vroot_dev[minor]; + real_bdev = vr->vr_device; + + vxdprintk(VXD_CBIT(misc, 0), + "vroot[%d]_get_real_bdev: dev=" VXF_DEV, + vr->vr_number, VXD_DEV(real_bdev)); + + if (vr->vr_state != Vr_bound) + return ERR_PTR(-ENXIO); + + __iget(real_bdev->bd_inode); + return real_bdev; +} + +/* + * And now the modules code and kernel interface. + */ + +module_param(max_vroot, int, 0); + +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR); + +MODULE_AUTHOR ("Herbert Pötzl"); +MODULE_DESCRIPTION ("Virtual Root Device Mapper"); + + +int __init vroot_init(void) +{ + int err, i; + + if (max_vroot < 1 || max_vroot > 256) { + max_vroot = MAX_VROOT_DEFAULT; + printk(KERN_WARNING "vroot: invalid max_vroot " + "(must be between 1 and 256), " + "using default (%d)\n", max_vroot); + } + + if (register_blkdev(VROOT_MAJOR, "vroot")) + return -EIO; + + err = -ENOMEM; + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL); + if (!vroot_dev) + goto out_mem1; + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device)); + + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL); + if (!disks) + goto out_mem2; + + for (i = 0; i < max_vroot; i++) { + disks[i] = alloc_disk(1); + if (!disks[i]) + goto out_mem3; + } + + for (i = 0; i < max_vroot; i++) { + struct vroot_device *vr = &vroot_dev[i]; + struct gendisk *disk = disks[i]; + + memset(vr, 0, sizeof(*vr)); + init_MUTEX(&vr->vr_ctl_mutex); + vr->vr_number = i; + disk->major = VROOT_MAJOR; + disk->first_minor = i; + disk->fops = &vr_fops; + sprintf(disk->disk_name, "vroot%d", i); + disk->private_data = vr; + } + + err = register_vroot_grb(&__vroot_get_real_bdev); + if (err) + goto out_mem3; + + for (i = 0; i < max_vroot; i++) + add_disk(disks[i]); + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot); + return 0; + +out_mem3: + while (i--) + put_disk(disks[i]); + kfree(disks); +out_mem2: + kfree(vroot_dev); +out_mem1: + unregister_blkdev(VROOT_MAJOR, "vroot"); + printk(KERN_ERR "vroot: ran out of memory\n"); + return err; +} + +void vroot_exit(void) +{ + int i; + + if (unregister_vroot_grb(&__vroot_get_real_bdev)) + printk(KERN_WARNING "vroot: cannot unregister grb\n"); + + for (i = 0; i < max_vroot; i++) { + del_gendisk(disks[i]); + put_disk(disks[i]); + } + if (unregister_blkdev(VROOT_MAJOR, "vroot")) + printk(KERN_WARNING "vroot: cannot unregister blkdev\n"); + + kfree(disks); + kfree(vroot_dev); +} + +module_init(vroot_init); +module_exit(vroot_exit); + +#ifndef MODULE + +static int __init max_vroot_setup(char *str) +{ + max_vroot = simple_strtol(str, NULL, 0); + return 1; +} + +__setup("max_vroot=", max_vroot_setup); + +#endif + diff -NurpP --minimal linux-2.6.19/drivers/char/random.c linux-2.6.19-vs2.1.x-t1/drivers/char/random.c --- linux-2.6.19/drivers/char/random.c 2006-11-30 21:18:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/char/random.c 2006-11-08 04:57:40 +0100 @@ -1178,7 +1178,7 @@ static char sysctl_bootid[16]; static int proc_do_uuid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table fake_table; + ctl_table fake_table = {0}; unsigned char buf[64], tmp_uuid[16], *uuid; uuid = table->data; diff -NurpP --minimal linux-2.6.19/drivers/char/sysrq.c linux-2.6.19-vs2.1.x-t1/drivers/char/sysrq.c --- linux-2.6.19/drivers/char/sysrq.c 2006-11-30 21:18:41 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/char/sysrq.c 2006-11-30 19:40:51 +0100 @@ -260,6 +260,21 @@ static struct sysrq_key_op sysrq_unrt_op .enable_mask = SYSRQ_ENABLE_RTNICE, }; + +#ifdef CONFIG_VSERVER_DEBUG +static void sysrq_handle_vxinfo(int key, struct tty_struct *tty) +{ + dump_vx_info_inactive((key == 'x')?0:1); +} + +static struct sysrq_key_op sysrq_showvxinfo_op = { + .handler = sysrq_handle_vxinfo, + .help_msg = "conteXt", + .action_msg = "Show Context Info", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; +#endif + /* Key Operations table and lock */ static DEFINE_SPINLOCK(sysrq_key_table_lock); @@ -304,7 +319,11 @@ static struct sysrq_key_op *sysrq_key_ta /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ NULL, /* w */ +#ifdef CONFIG_VSERVER_DEBUG + &sysrq_showvxinfo_op, /* x */ +#else NULL, /* x */ +#endif NULL, /* y */ NULL /* z */ }; @@ -318,6 +337,8 @@ static int sysrq_key_table_key2index(int retval = key - '0'; else if ((key >= 'a') && (key <= 'z')) retval = key + 10 - 'a'; + else if ((key >= 'A') && (key <= 'Z')) + retval = key + 10 - 'A'; else retval = -1; return retval; diff -NurpP --minimal linux-2.6.19/drivers/char/tty_io.c linux-2.6.19-vs2.1.x-t1/drivers/char/tty_io.c --- linux-2.6.19/drivers/char/tty_io.c 2006-11-30 21:18:41 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/char/tty_io.c 2006-11-08 04:57:52 +0100 @@ -103,6 +103,7 @@ #include #include +#include #undef TTY_DEBUG_HANGUP @@ -2941,13 +2942,16 @@ static int tiocsctty(struct tty_struct * static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + pid_t pgrp; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + + pgrp = vx_map_pid(real_tty->pgrp); + return put_user(pgrp, p); } /** @@ -2977,6 +2981,8 @@ static int tiocspgrp(struct tty_struct * return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; + + pgrp = vx_rmap_pid(pgrp); if (pgrp < 0) return -EINVAL; if (session_of_pgrp(pgrp) != current->signal->session) diff -NurpP --minimal linux-2.6.19/drivers/infiniband/core/uverbs_mem.c linux-2.6.19-vs2.1.x-t1/drivers/infiniband/core/uverbs_mem.c --- linux-2.6.19/drivers/infiniband/core/uverbs_mem.c 2006-06-18 04:53:04 +0200 +++ linux-2.6.19-vs2.1.x-t1/drivers/infiniband/core/uverbs_mem.c 2006-11-08 04:57:47 +0100 @@ -36,6 +36,7 @@ #include #include +#include #include "uverbs.h" @@ -161,7 +162,7 @@ out: if (ret < 0) __ib_umem_release(dev, mem, 0); else - current->mm->locked_vm = locked; + vx_vmlocked_sub(current->mm, current->mm->locked_vm - locked); up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -174,8 +175,8 @@ void ib_umem_release(struct ib_device *d __ib_umem_release(dev, umem, 1); down_write(¤t->mm->mmap_sem); - current->mm->locked_vm -= - PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + vx_vmlocked_sub(current->mm, + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT); up_write(¤t->mm->mmap_sem); } @@ -184,7 +185,7 @@ static void ib_umem_account(void *work_p struct ib_umem_account_work *work = work_ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->diff; + vx_vmlocked_sub(work->mm, work->diff); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.19/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-2.6.19-vs2.1.x-t1/drivers/infiniband/hw/ipath/ipath_user_pages.c --- linux-2.6.19/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-11-30 21:18:44 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/infiniband/hw/ipath/ipath_user_pages.c 2006-11-08 04:57:47 +0100 @@ -33,6 +33,7 @@ #include #include +#include #include "ipath_kernel.h" @@ -61,7 +62,8 @@ static int __get_user_pages(unsigned lon lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - if (num_pages > lock_limit) { + if (num_pages > lock_limit || + !vx_vmlocked_avail(current->mm, num_pages)) { ret = -ENOMEM; goto bail; } @@ -78,7 +80,7 @@ static int __get_user_pages(unsigned lon goto bail_release; } - current->mm->locked_vm += num_pages; + vx_vmlocked_add(current->mm, num_pages); ret = 0; goto bail; @@ -203,7 +205,7 @@ void ipath_release_user_pages(struct pag __ipath_release_user_pages(p, num_pages, 1); - current->mm->locked_vm -= num_pages; + vx_vmlocked_sub(current->mm, num_pages); up_write(¤t->mm->mmap_sem); } @@ -219,7 +221,7 @@ static void user_pages_account(void *ptr struct ipath_user_pages_work *work = ptr; down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->num_pages; + vx_vmlocked_sub(work->mm, work->num_pages); up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); diff -NurpP --minimal linux-2.6.19/drivers/md/dm-ioctl.c linux-2.6.19-vs2.1.x-t1/drivers/md/dm-ioctl.c --- linux-2.6.19/drivers/md/dm-ioctl.c 2006-11-30 21:18:46 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/md/dm-ioctl.c 2006-11-30 19:57:46 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -100,7 +101,8 @@ static struct hash_cell *__get_name_cell unsigned int h = hash_str(str); list_for_each_entry (hc, _name_buckets + h, name_list) - if (!strcmp(hc->name, str)) { + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT) && + !strcmp(hc->name, str)) { dm_get(hc->md); return hc; } @@ -114,7 +116,8 @@ static struct hash_cell *__get_uuid_cell unsigned int h = hash_str(str); list_for_each_entry (hc, _uuid_buckets + h, uuid_list) - if (!strcmp(hc->uuid, str)) { + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT) && + !strcmp(hc->uuid, str)) { dm_get(hc->md); return hc; } @@ -349,6 +352,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl static int remove_all(struct dm_ioctl *param, size_t param_size) { + if (!vx_check(0, VS_ADMIN)) + return -EPERM; + dm_hash_remove_all(1); param->data_size = 0; return 0; @@ -396,6 +402,8 @@ static int list_devices(struct dm_ioctl */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT)) + continue; needed += sizeof(struct dm_name_list); needed += strlen(hc->name) + 1; needed += ALIGN_MASK; @@ -419,6 +427,8 @@ static int list_devices(struct dm_ioctl */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P|VS_IDENT)) + continue; if (old_nl) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); @@ -609,10 +619,11 @@ static struct hash_cell *__find_device_h if (!md) goto out; - mdptr = dm_get_mdptr(md); + if (vx_check(dm_get_xid(md), VS_WATCH_P|VS_IDENT)) + mdptr = dm_get_mdptr(md); + if (!mdptr) dm_put(md); - out: return mdptr; } @@ -1405,8 +1416,8 @@ static int ctl_ioctl(struct inode *inode ioctl_fn fn = NULL; size_t param_size; - /* only root can play with this */ - if (!capable(CAP_SYS_ADMIN)) + /* only root and certain contexts can play with this */ + if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER)) return -EACCES; if (_IOC_TYPE(command) != DM_IOCTL) diff -NurpP --minimal linux-2.6.19/drivers/md/dm.c linux-2.6.19-vs2.1.x-t1/drivers/md/dm.c --- linux-2.6.19/drivers/md/dm.c 2006-11-30 21:18:46 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/md/dm.c 2006-11-30 18:53:18 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -75,6 +76,7 @@ struct mapped_device { rwlock_t map_lock; atomic_t holders; atomic_t open_count; + xid_t xid; unsigned long flags; @@ -220,6 +222,7 @@ static void __exit dm_exit(void) static int dm_blk_open(struct inode *inode, struct file *file) { struct mapped_device *md; + int ret = -ENXIO; spin_lock(&_minor_lock); @@ -228,18 +231,19 @@ static int dm_blk_open(struct inode *ino goto out; if (test_bit(DMF_FREEING, &md->flags) || - test_bit(DMF_DELETING, &md->flags)) { - md = NULL; + test_bit(DMF_DELETING, &md->flags)) + goto out; + + ret = -EACCES; + if (!vx_check(md->xid, VS_IDENT)) goto out; - } dm_get(md); atomic_inc(&md->open_count); - + ret = 0; out: spin_unlock(&_minor_lock); - - return md ? 0 : -ENXIO; + return ret; } static int dm_blk_close(struct inode *inode, struct file *file) @@ -435,6 +439,14 @@ int dm_set_geometry(struct mapped_device return 0; } +/* + * Get the xid associated with a dm device + */ +xid_t dm_get_xid(struct mapped_device *md) +{ + return md->xid; +} + /*----------------------------------------------------------------- * CRUD START: * A more elegant soln is in the works that uses the queue @@ -952,6 +964,7 @@ static struct mapped_device *alloc_dev(i atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); + md->xid = vx_current_xid(); md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) diff -NurpP --minimal linux-2.6.19/drivers/md/dm.h linux-2.6.19-vs2.1.x-t1/drivers/md/dm.h --- linux-2.6.19/drivers/md/dm.h 2006-11-30 21:18:46 +0100 +++ linux-2.6.19-vs2.1.x-t1/drivers/md/dm.h 2006-11-08 04:57:52 +0100 @@ -72,6 +72,8 @@ void dm_put_target_type(struct target_ty int dm_target_iterate(void (*iter_func)(struct target_type *tt, void *param), void *param); +xid_t dm_get_xid(struct mapped_device *md); + /*----------------------------------------------------------------- * Useful inlines. *---------------------------------------------------------------*/ diff -NurpP --minimal linux-2.6.19/fs/attr.c linux-2.6.19-vs2.1.x-t1/fs/attr.c --- linux-2.6.19/fs/attr.c 2006-04-09 13:49:53 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/attr.c 2006-11-30 19:40:56 +0100 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -56,6 +59,30 @@ int inode_change_ok(struct inode *inode, if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VS_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + vxwprintk(1, "xid=%d messing with the barrier.", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + /* maybe allow that in the future? */ + vxwprintk(1, "xid=%d messing with the procfs.", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + /* devpts is xid tagged */ + if (vx_check((xid_t)inode->i_tag, VS_IDENT)) + goto fine; + vxwprintk(1, "xid=%d messing with the devpts.", + vx_current_xid()); + goto error; + } fine: retval = 0; error: @@ -79,6 +106,8 @@ int inode_setattr(struct inode * inode, inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; if (ia_valid & ATTR_ATIME) inode->i_atime = timespec_trunc(attr->ia_atime, inode->i_sb->s_time_gran); @@ -153,7 +182,8 @@ int notify_change(struct dentry * dentry error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (!error) error = inode_setattr(inode, attr); diff -NurpP --minimal linux-2.6.19/fs/binfmt_aout.c linux-2.6.19-vs2.1.x-t1/fs/binfmt_aout.c --- linux-2.6.19/fs/binfmt_aout.c 2006-11-30 21:19:18 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/binfmt_aout.c 2006-11-08 04:57:40 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/fs/binfmt_elf.c linux-2.6.19-vs2.1.x-t1/fs/binfmt_elf.c --- linux-2.6.19/fs/binfmt_elf.c 2006-11-30 21:19:18 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/binfmt_elf.c 2006-11-08 04:57:40 +0100 @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/fs/binfmt_elf_fdpic.c linux-2.6.19-vs2.1.x-t1/fs/binfmt_elf_fdpic.c --- linux-2.6.19/fs/binfmt_elf_fdpic.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/binfmt_elf_fdpic.c 2006-11-08 04:57:40 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/fs/binfmt_flat.c linux-2.6.19-vs2.1.x-t1/fs/binfmt_flat.c --- linux-2.6.19/fs/binfmt_flat.c 2006-09-20 16:58:34 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/binfmt_flat.c 2006-11-08 04:57:40 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/fs/binfmt_som.c linux-2.6.19-vs2.1.x-t1/fs/binfmt_som.c --- linux-2.6.19/fs/binfmt_som.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/binfmt_som.c 2006-11-08 04:57:40 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/fs/dcache.c linux-2.6.19-vs2.1.x-t1/fs/dcache.c --- linux-2.6.19/fs/dcache.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/dcache.c 2006-11-08 04:57:48 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" @@ -147,6 +148,7 @@ void dput(struct dentry *dentry) if (!dentry) return; + vx_dentry_dec(dentry); repeat: if (atomic_read(&dentry->d_count) == 1) might_sleep(); @@ -160,6 +162,8 @@ repeat: return; } + vx_dentry_dec(dentry); + /* * AV: ->d_delete() is _NOT_ allowed to block now. */ @@ -270,6 +274,7 @@ static inline struct dentry * __dget_loc if (!list_empty(&dentry->d_lru)) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); + vx_dentry_inc(dentry); } return dentry; } @@ -861,6 +866,9 @@ struct dentry *d_alloc(struct dentry * p struct dentry *dentry; char *dname; + if (!vx_dentry_avail(1)) + return NULL; + dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; @@ -909,6 +917,7 @@ struct dentry *d_alloc(struct dentry * p if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; + vx_dentry_inc(dentry); spin_unlock(&dcache_lock); return dentry; @@ -1258,6 +1267,7 @@ struct dentry * __d_lookup(struct dentry if (!d_unhashed(dentry)) { atomic_inc(&dentry->d_count); + vx_dentry_inc(dentry); found = dentry; } spin_unlock(&dentry->d_lock); diff -NurpP --minimal linux-2.6.19/fs/devpts/inode.c linux-2.6.19-vs2.1.x-t1/fs/devpts/inode.c --- linux-2.6.19/fs/devpts/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/devpts/inode.c 2006-11-30 18:53:18 +0100 @@ -19,8 +19,22 @@ #include #include #include +#include -#define DEVPTS_SUPER_MAGIC 0x1cd1 + +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + /* devpts is xid tagged */ + if (vx_check((xid_t)inode->i_tag, VS_WATCH_P|VS_IDENT)) + ret = generic_permission(inode, mask, NULL); + return ret; +} + +static struct inode_operations devpts_file_inode_operations = { + .permission = devpts_permission, +}; static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -91,6 +105,25 @@ static int devpts_remount(struct super_b return 0; } +static int devpts_filter(struct dentry *de) +{ + /* devpts is xid tagged */ + return vx_check((xid_t)de->d_inode->i_tag, VS_WATCH_P|VS_IDENT); +} + +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return dcache_readdir_filter(filp, dirent, filldir, devpts_filter); +} + +static struct file_operations devpts_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = devpts_readdir, +}; + static struct super_operations devpts_sops = { .statfs = simple_statfs, .remount_fs = devpts_remount, @@ -116,8 +149,10 @@ devpts_fill_super(struct super_block *s, inode->i_uid = inode->i_gid = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &devpts_dir_operations; inode->i_nlink = 2; + /* devpts is xid tagged */ + inode->i_tag = (tag_t)vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) @@ -175,6 +210,9 @@ int devpts_pty_new(struct tty_struct *tt inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + /* devpts is xid tagged */ + inode->i_tag = (tag_t)vx_current_xid(); + inode->i_op = &devpts_file_inode_operations; inode->i_private = tty; dentry = get_node(number); diff -NurpP --minimal linux-2.6.19/fs/dquot.c linux-2.6.19-vs2.1.x-t1/fs/dquot.c --- linux-2.6.19/fs/dquot.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/dquot.c 2006-11-30 19:41:09 +0100 @@ -185,7 +185,7 @@ static void put_quota_format(struct quot /* * Dquot List Management: * The quota code uses three lists for dquot management: the inuse_list, - * free_dquots, and dquot_hash[] array. A single dquot structure may be + * free_dquots, and hash->dqh_hash[] array. A single dquot structure may be * on all three lists, depending on its current state. * * All dquots are placed to the end of inuse_list when first created, and this @@ -198,7 +198,7 @@ static void put_quota_format(struct quot * dquot is invalidated it's completely released from memory. * * Dquots with a specific identity (device, type and id) are placed on - * one of the dquot_hash[] hash chains. The provides an efficient search + * one of the hash->dqh_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. */ @@ -212,36 +212,44 @@ struct dqstats dqstats; static void dqput(struct dquot *dquot); static inline unsigned int -hashfn(const struct super_block *sb, unsigned int id, int type) +hashfn(struct dqhash *hash, unsigned int id, int type) { unsigned long tmp; - tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); + tmp = (((unsigned long)hash >> L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask; } /* * Following list functions expect dq_list_lock to be held */ -static inline void insert_dquot_hash(struct dquot *dquot) +static inline void insert_dquot_hash(struct dqhash *hash, struct dquot *dquot) { - struct hlist_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); + struct hlist_head *head = dquot_hash + + hashfn(hash, dquot->dq_id, dquot->dq_type); + /* struct hlist_head *head = hash->dqh_hash + + hashfn(dquot->dq_dqh, dquot->dq_id, dquot->dq_type); */ hlist_add_head(&dquot->dq_hash, head); + dquot->dq_dqh = dqhget(hash); } static inline void remove_dquot_hash(struct dquot *dquot) { hlist_del_init(&dquot->dq_hash); + dqhput(dquot->dq_dqh); + dquot->dq_dqh = NULL; } -static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type) +static inline struct dquot *find_dquot(struct dqhash *hash, + unsigned int hashent, unsigned int id, int type) { struct hlist_node *node; struct dquot *dquot; - hlist_for_each (node, dquot_hash+hashent) { + /* hlist_for_each (node, hash->dqh_hash + hashent) { */ + hlist_for_each (node, dquot_hash + hashent) { dquot = hlist_entry(node, struct dquot, dq_hash); - if (dquot->dq_sb == sb && dquot->dq_id == id && dquot->dq_type == type) + if (dquot->dq_dqh == hash && dquot->dq_id == id && dquot->dq_type == type) return dquot; } return NODQUOT; @@ -285,13 +293,13 @@ static void wait_on_dquot(struct dquot * mutex_unlock(&dquot->dq_lock); } -#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) +#define mark_dquot_dirty(dquot) ((dquot)->dq_dqh->dqh_qop->mark_dirty(dquot)) int dquot_mark_dquot_dirty(struct dquot *dquot) { spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) - list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> + list_add(&dquot->dq_dirty, &dqh_dqopt(dquot->dq_dqh)-> info[dquot->dq_type].dqi_dirty_list); spin_unlock(&dq_list_lock); return 0; @@ -306,9 +314,9 @@ static inline int clear_dquot_dirty(stru return 1; } -void mark_info_dirty(struct super_block *sb, int type) +void mark_info_dirty(struct dqhash *hash, int type) { - set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags); + set_bit(DQF_INFO_DIRTY_B, &dqh_dqopt(hash)->info[type].dqi_flags); } EXPORT_SYMBOL(mark_info_dirty); @@ -319,7 +327,7 @@ EXPORT_SYMBOL(mark_info_dirty); int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); mutex_lock(&dquot->dq_lock); mutex_lock(&dqopt->dqio_mutex); @@ -333,7 +341,7 @@ int dquot_acquire(struct dquot *dquot) ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); /* Write the info if needed */ if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret < 0) goto out_iolock; if (ret2 < 0) { @@ -354,7 +362,7 @@ out_iolock: int dquot_commit(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); mutex_lock(&dqopt->dqio_mutex); spin_lock(&dq_list_lock); @@ -368,7 +376,7 @@ int dquot_commit(struct dquot *dquot) if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret >= 0) ret = ret2; } @@ -383,7 +391,7 @@ out_sem: int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh); mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ @@ -394,7 +402,7 @@ int dquot_release(struct dquot *dquot) ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); /* Write the info */ if (info_dirty(&dqopt->info[dquot->dq_type])) - ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type); + ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type); if (ret >= 0) ret = ret2; } @@ -411,14 +419,14 @@ out_dqlock: * just deleted or pruned by prune_icache() (those are not attached to any * list). We have to wait for such users. */ -static void invalidate_dquots(struct super_block *sb, int type) +static void invalidate_dquots(struct dqhash *hash, int type) { struct dquot *dquot, *tmp; restart: spin_lock(&dq_list_lock); list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { - if (dquot->dq_sb != sb) + if (dquot->dq_dqh != hash) continue; if (dquot->dq_type != type) continue; @@ -458,18 +466,18 @@ restart: spin_unlock(&dq_list_lock); } -int vfs_quota_sync(struct super_block *sb, int type) +int vfs_quota_sync(struct dqhash *hash, int type) { struct list_head *dirty; struct dquot *dquot; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); int cnt; mutex_lock(&dqopt->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; spin_lock(&dq_list_lock); dirty = &dqopt->info[cnt].dqi_dirty_list; @@ -486,7 +494,7 @@ int vfs_quota_sync(struct super_block *s atomic_inc(&dquot->dq_count); dqstats.lookups++; spin_unlock(&dq_list_lock); - sb->dq_op->write_dquot(dquot); + hash->dqh_qop->write_dquot(dquot); dqput(dquot); spin_lock(&dq_list_lock); } @@ -494,9 +502,10 @@ int vfs_quota_sync(struct super_block *s } for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) + if ((cnt == type || type == -1) + && dqh_has_quota_enabled(hash, cnt) && info_dirty(&dqopt->info[cnt])) - sb->dq_op->write_info(sb, cnt); + hash->dqh_qop->write_info(hash, cnt); spin_lock(&dq_list_lock); dqstats.syncs++; spin_unlock(&dq_list_lock); @@ -551,7 +560,7 @@ static void dqput(struct dquot *dquot) if (!atomic_read(&dquot->dq_count)) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", - dquot->dq_sb->s_id, + dquot->dq_dqh->dqh_sb->s_id, quotatypes[dquot->dq_type], dquot->dq_id); BUG(); @@ -567,7 +576,7 @@ we_slept: /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ - if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) && + if (!dqh_has_quota_enabled(dquot->dq_dqh, dquot->dq_type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot->dq_wait_unused); spin_unlock(&dq_list_lock); @@ -577,14 +586,14 @@ we_slept: if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ - dquot->dq_sb->dq_op->write_dquot(dquot); + dquot->dq_dqh->dqh_qop->write_dquot(dquot); goto we_slept; } /* Clear flag in case dquot was inactive (something bad happened) */ clear_dquot_dirty(dquot); if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); - dquot->dq_sb->dq_op->release_dquot(dquot); + dquot->dq_dqh->dqh_qop->release_dquot(dquot); goto we_slept; } atomic_dec(&dquot->dq_count); @@ -596,7 +605,7 @@ we_slept: spin_unlock(&dq_list_lock); } -static struct dquot *get_empty_dquot(struct super_block *sb, int type) +static struct dquot *get_empty_dquot(int type) { struct dquot *dquot; @@ -611,7 +620,7 @@ static struct dquot *get_empty_dquot(str INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); init_waitqueue_head(&dquot->dq_wait_unused); - dquot->dq_sb = sb; + dquot->dq_dqh = NULL; dquot->dq_type = type; atomic_set(&dquot->dq_count, 1); @@ -622,19 +631,19 @@ static struct dquot *get_empty_dquot(str * Get reference to dquot * MUST be called with either dqptr_sem or dqonoff_mutex held */ -static struct dquot *dqget(struct super_block *sb, unsigned int id, int type) +static struct dquot *dqget(struct dqhash *hash, unsigned int id, int type) { - unsigned int hashent = hashfn(sb, id, type); + unsigned int hashent = hashfn(hash, id, type); struct dquot *dquot, *empty = NODQUOT; - if (!sb_has_quota_enabled(sb, type)) + if (!dqh_has_quota_enabled(hash, type)) return NODQUOT; we_slept: spin_lock(&dq_list_lock); - if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) { + if ((dquot = find_dquot(hash, hashent, id, type)) == NODQUOT) { if (empty == NODQUOT) { spin_unlock(&dq_list_lock); - if ((empty = get_empty_dquot(sb, type)) == NODQUOT) + if ((empty = get_empty_dquot(type)) == NODQUOT) schedule(); /* Try to wait for a moment... */ goto we_slept; } @@ -643,7 +652,7 @@ we_slept: /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ - insert_dquot_hash(dquot); + insert_dquot_hash(hash, dquot); dqstats.lookups++; spin_unlock(&dq_list_lock); } else { @@ -660,12 +669,13 @@ we_slept: * finished or it will be canceled due to dq_count > 1 test */ wait_on_dquot(dquot); /* Read the dquot and instantiate it (everything done only if needed) */ - if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) { + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && + hash->dqh_qop->acquire_dquot(dquot) < 0) { dqput(dquot); return NODQUOT; } #ifdef __DQUOT_PARANOIA - BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ + BUG_ON(!dquot->dq_dqh); /* Has somebody invalidated entry under us? */ #endif return dquot; @@ -686,9 +696,10 @@ static int dqinit_needed(struct inode *i } /* This routine is guarded by dqonoff_mutex mutex */ -static void add_dquot_ref(struct super_block *sb, int type) +static void add_dquot_ref(struct dqhash *hash, int type) { struct list_head *p; + struct super_block *sb = hash->dqh_sb; restart: file_list_lock(); @@ -698,7 +709,7 @@ restart: if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) { struct dentry *dentry = dget(filp->f_dentry); file_list_unlock(); - sb->dq_op->initialize(inode, type); + hash->dqh_qop->initialize(inode, type); dput(dentry); /* As we may have blocked we had better restart... */ goto restart; @@ -757,13 +768,13 @@ static void put_dquot_list(struct list_h } /* Gather all references from inodes and drop them */ -static void drop_dquot_ref(struct super_block *sb, int type) +static void drop_dquot_ref(struct dqhash *hash, int type) { LIST_HEAD(tofree_head); - down_write(&sb_dqopt(sb)->dqptr_sem); - remove_dquot_ref(sb, type, &tofree_head); - up_write(&sb_dqopt(sb)->dqptr_sem); + down_write(&dqh_dqopt(hash)->dqptr_sem); + remove_dquot_ref(hash, type, &tofree_head); + up_write(&dqh_dqopt(hash)->dqptr_sem); put_dquot_list(&tofree_head); } @@ -837,7 +848,7 @@ static void print_warning(struct dquot * mutex_lock(&tty_mutex); if (!current->signal->tty) goto out_lock; - tty_write_message(current->signal->tty, dquot->dq_sb->s_id); + tty_write_message(current->signal->tty, dquot->dq_dqh->dqh_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->signal->tty, ": warning, "); else @@ -879,7 +890,7 @@ static inline void flush_warnings(struct static inline char ignore_hardlimit(struct dquot *dquot) { - struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; + struct mem_dqinfo *info = &dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH)); @@ -911,7 +922,7 @@ static int check_idq(struct dquot *dquot (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime == 0) { *warntype = ISOFTWARN; - dquot->dq_dqb.dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; + dquot->dq_dqb.dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace; } return QUOTA_OK; @@ -946,7 +957,7 @@ static int check_bdq(struct dquot *dquot dquot->dq_dqb.dqb_btime == 0) { if (!prealloc) { *warntype = BSOFTWARN; - dquot->dq_dqb.dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; + dquot->dq_dqb.dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace; } else /* @@ -972,7 +983,7 @@ int dquot_initialize(struct inode *inode * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return 0; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ if (IS_NOQUOTA(inode)) goto out_err; @@ -988,11 +999,11 @@ int dquot_initialize(struct inode *inode id = inode->i_gid; break; } - inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt); + inode->i_dquot[cnt] = dqget(inode->i_dqh, id, cnt); } } out_err: - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -1004,14 +1015,14 @@ int dquot_drop(struct inode *inode) { int cnt; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] != NODQUOT) { dqput(inode->i_dquot[cnt]); inode->i_dquot[cnt] = NODQUOT; } } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return 0; } @@ -1042,9 +1053,9 @@ out_add: for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); if (IS_NOQUOTA(inode)) { /* Now we can do reliable test... */ - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); goto out_add; } spin_lock(&dq_data_lock); @@ -1069,7 +1080,7 @@ warn_put_all: if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings(inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -1087,9 +1098,9 @@ int dquot_alloc_inode(const struct inode return QUOTA_OK; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } spin_lock(&dq_data_lock); @@ -1114,7 +1125,7 @@ warn_put_all: if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings((struct dquot **)inode->i_dquot, warntype); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } @@ -1132,10 +1143,10 @@ out_sub: inode_sub_bytes(inode, number); return QUOTA_OK; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); goto out_sub; } spin_lock(&dq_data_lock); @@ -1150,7 +1161,7 @@ out_sub: for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } @@ -1165,10 +1176,10 @@ int dquot_free_inode(const struct inode * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return QUOTA_OK; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } spin_lock(&dq_data_lock); @@ -1182,7 +1193,7 @@ int dquot_free_inode(const struct inode for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } @@ -1197,6 +1208,7 @@ int dquot_transfer(struct inode *inode, qsize_t space; struct dquot *transfer_from[MAXQUOTAS]; struct dquot *transfer_to[MAXQUOTAS]; + struct dqhash *dqh = inode->i_sb->s_dqh; int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; char warntype[MAXQUOTAS]; @@ -1210,10 +1222,10 @@ int dquot_transfer(struct inode *inode, transfer_to[cnt] = transfer_from[cnt] = NODQUOT; warntype[cnt] = NOWARN; } - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return QUOTA_OK; } /* First build the transfer_to list - here we can block on @@ -1224,12 +1236,12 @@ int dquot_transfer(struct inode *inode, case USRQUOTA: if (!chuid) continue; - transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_uid, cnt); + transfer_to[cnt] = dqget(dqh, iattr->ia_uid, cnt); break; case GRPQUOTA: if (!chgid) continue; - transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_gid, cnt); + transfer_to[cnt] = dqget(dqh, iattr->ia_gid, cnt); break; } } @@ -1284,20 +1296,20 @@ warn_put_all: if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT) dqput(transfer_to[cnt]); } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem); return ret; } /* * Write info of quota file to disk */ -int dquot_commit_info(struct super_block *sb, int type) +int dquot_commit_info(struct dqhash *hash, int type) { int ret; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); mutex_lock(&dqopt->dqio_mutex); - ret = dqopt->ops[type]->write_file_info(sb, type); + ret = dqopt->ops[type]->write_file_info(hash, type); mutex_unlock(&dqopt->dqio_mutex); return ret; } @@ -1347,10 +1359,10 @@ static inline void reset_enable_flags(st /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int vfs_quota_off(struct super_block *sb, int type) +int vfs_quota_off(struct dqhash *hash, int type) { int cnt; - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct inode *toputinode[MAXQUOTAS]; /* We need to serialize quota_off() for device */ @@ -1359,21 +1371,21 @@ int vfs_quota_off(struct super_block *sb toputinode[cnt] = NULL; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; reset_enable_flags(dqopt, cnt); /* Note: these are blocking operations */ - drop_dquot_ref(sb, cnt); - invalidate_dquots(sb, cnt); + drop_dquot_ref(hash, cnt); + invalidate_dquots(hash, cnt); /* * Now all dquots should be invalidated, all writes done so we should be only * users of the info. No locks needed. */ if (info_dirty(&dqopt->info[cnt])) - sb->dq_op->write_info(sb, cnt); + hash->dqh_qop->write_info(hash, cnt); if (dqopt->ops[cnt]->free_file_info) - dqopt->ops[cnt]->free_file_info(sb, cnt); + dqopt->ops[cnt]->free_file_info(hash, cnt); put_quota_format(dqopt->info[cnt].dqi_format); toputinode[cnt] = dqopt->files[cnt]; @@ -1386,9 +1398,9 @@ int vfs_quota_off(struct super_block *sb mutex_unlock(&dqopt->dqonoff_mutex); /* Sync the superblock so that buffers with quota data are written to * disk (and so userspace sees correct data afterwards). */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (hash->dqh_sb->s_op->sync_fs) + hash->dqh_sb->s_op->sync_fs(hash->dqh_sb, 1); + sync_blockdev(hash->dqh_sb->s_bdev); /* Now the quota files are just ordinary files and we can set the * inode flags back. Moreover we discard the pagecache so that * userspace sees the writes we did bypassing the pagecache. We @@ -1399,7 +1411,7 @@ int vfs_quota_off(struct super_block *sb mutex_lock(&dqopt->dqonoff_mutex); /* If quota was reenabled in the meantime, we have * nothing to do */ - if (!sb_has_quota_enabled(sb, cnt)) { + if (!dqh_has_quota_enabled(hash, cnt)) { mutex_lock(&toputinode[cnt]->i_mutex); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); @@ -1410,8 +1422,8 @@ int vfs_quota_off(struct super_block *sb } mutex_unlock(&dqopt->dqonoff_mutex); } - if (sb->s_bdev) - invalidate_bdev(sb->s_bdev, 0); + if (hash->dqh_sb->s_bdev) + invalidate_bdev(hash->dqh_sb->s_bdev, 0); return 0; } @@ -1424,7 +1436,8 @@ static int vfs_quota_on_inode(struct ino { struct quota_format_type *fmt = find_quota_format(format_id); struct super_block *sb = inode->i_sb; - struct quota_info *dqopt = sb_dqopt(sb); + struct dqhash *hash = inode->i_dqh; + struct quota_info *dqopt = dqh_dqopt(hash); int error; int oldflags = -1; @@ -1450,7 +1463,7 @@ static int vfs_quota_on_inode(struct ino invalidate_bdev(sb->s_bdev, 0); mutex_lock(&inode->i_mutex); mutex_lock(&dqopt->dqonoff_mutex); - if (sb_has_quota_enabled(sb, type)) { + if (dqh_has_quota_enabled(hash, type)) { error = -EBUSY; goto out_lock; } @@ -1461,21 +1474,21 @@ static int vfs_quota_on_inode(struct ino oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA); inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; up_write(&dqopt->dqptr_sem); - sb->dq_op->drop(inode); + hash->dqh_qop->drop(inode); error = -EIO; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) goto out_lock; error = -EINVAL; - if (!fmt->qf_ops->check_quota_file(sb, type)) + if (!fmt->qf_ops->check_quota_file(hash, type)) goto out_file_init; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); mutex_lock(&dqopt->dqio_mutex); - if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) { + if ((error = dqopt->ops[type]->read_file_info(hash, type)) < 0) { mutex_unlock(&dqopt->dqio_mutex); goto out_file_init; } @@ -1483,7 +1496,7 @@ static int vfs_quota_on_inode(struct ino mutex_unlock(&inode->i_mutex); set_enable_flags(dqopt, type); - add_dquot_ref(sb, type); + add_dquot_ref(hash, type); mutex_unlock(&dqopt->dqonoff_mutex); return 0; @@ -1509,7 +1522,7 @@ out_fmt: } /* Actual function called from quotactl() */ -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path) { struct nameidata nd; int error; @@ -1521,7 +1534,7 @@ int vfs_quota_on(struct super_block *sb, if (error) goto out_path; /* Quota file not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) + if (nd.mnt->mnt_sb != hash->dqh_sb) error = -EXDEV; else error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); @@ -1534,13 +1547,13 @@ out_path: * This function is used when filesystem needs to initialize quotas * during mount time. */ -int vfs_quota_on_mount(struct super_block *sb, char *qf_name, +int vfs_quota_on_mount(struct dqhash *hash, char *qf_name, int format_id, int type) { struct dentry *dentry; int error; - dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); + dentry = lookup_one_len(qf_name, hash->dqh_sb->s_root, strlen(qf_name)); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1576,18 +1589,18 @@ static void do_get_dqblk(struct dquot *d spin_unlock(&dq_data_lock); } -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di) { struct dquot *dquot; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!(dquot = dqget(sb, id, type))) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_lock(&dqh_dqopt(hash)->dqonoff_mutex); + if (!(dquot = dqget(hash, id, type))) { + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return -ESRCH; } do_get_dqblk(dquot, di); dqput(dquot); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return 0; } @@ -1627,7 +1640,7 @@ static void do_set_dqblk(struct dquot *d clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ - dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; + dm->dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace; } if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { @@ -1635,7 +1648,7 @@ static void do_set_dqblk(struct dquot *d clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ - dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; + dm->dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) clear_bit(DQ_FAKE_B, &dquot->dq_flags); @@ -1645,53 +1658,53 @@ static void do_set_dqblk(struct dquot *d mark_dquot_dirty(dquot); } -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) +int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di) { struct dquot *dquot; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!(dquot = dqget(sb, id, type))) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_lock(&dqh_dqopt(hash)->dqonoff_mutex); + if (!(dquot = dqget(hash, id, type))) { + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return -ESRCH; } do_set_dqblk(dquot, di); dqput(dquot); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return 0; } /* Generic routine for getting common part of quota file information */ -int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_enabled(sb, type)) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_lock(&dqh_dqopt(hash)->dqonoff_mutex); + if (!dqh_has_quota_enabled(hash, type)) { + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return -ESRCH; } - mi = sb_dqopt(sb)->info + type; + mi = dqh_dqopt(hash)->info + type; spin_lock(&dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; ii->dqi_flags = mi->dqi_flags & DQF_MASK; ii->dqi_valid = IIF_ALL; spin_unlock(&dq_data_lock); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return 0; } /* Generic routine for setting common part of quota file information */ -int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) +int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_enabled(sb, type)) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_lock(&dqh_dqopt(hash)->dqonoff_mutex); + if (!dqh_has_quota_enabled(hash, type)) { + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return -ESRCH; } - mi = sb_dqopt(sb)->info + type; + mi = dqh_dqopt(hash)->info + type; spin_lock(&dq_data_lock); if (ii->dqi_valid & IIF_BGRACE) mi->dqi_bgrace = ii->dqi_bgrace; @@ -1700,10 +1713,10 @@ int vfs_set_dqinfo(struct super_block *s if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); spin_unlock(&dq_data_lock); - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); /* Force write to disk */ - sb->dq_op->write_info(sb, type); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + hash->dqh_qop->write_info(hash, type); + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); return 0; } diff -NurpP --minimal linux-2.6.19/fs/exec.c linux-2.6.19-vs2.1.x-t1/fs/exec.c --- linux-2.6.19/fs/exec.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/exec.c 2006-11-08 04:57:47 +0100 @@ -49,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -436,7 +438,8 @@ int setup_arg_pages(struct linux_binprm kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); + vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt)); + mm->stack_vm = mm->total_vm; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { @@ -1306,7 +1309,7 @@ static void format_corename(char *corena /* UNIX time of coredump */ case 't': { struct timeval tv; - do_gettimeofday(&tv); + vx_gettimeofday(&tv); rc = snprintf(out_ptr, out_end - out_ptr, "%lu", tv.tv_sec); if (rc > out_end - out_ptr) diff -NurpP --minimal linux-2.6.19/fs/ext2/balloc.c linux-2.6.19-vs2.1.x-t1/fs/ext2/balloc.c --- linux-2.6.19/fs/ext2/balloc.c 2006-09-20 16:58:34 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/balloc.c 2006-11-08 04:57:50 +0100 @@ -16,6 +16,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -102,12 +104,14 @@ static int reserve_blocks(struct super_b { struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - unsigned free_blocks; - unsigned root_blocks; + unsigned long free_blocks; + unsigned long root_blocks; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); + DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks); + if (free_blocks < count) count = free_blocks; @@ -258,6 +262,7 @@ do_more: } error_return: brelse(bitmap_bh); + DLIMIT_FREE_BLOCK(inode, freed); release_blocks(sb, freed); DQUOT_FREE_BLOCK(inode, freed); } @@ -361,6 +366,10 @@ int ext2_new_block(struct inode *inode, *err = -ENOSPC; goto out_dquot; } + if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) { + *err = -ENOSPC; + goto out_dlimit; + } ext2_debug ("goal=%lu.\n", goal); @@ -508,6 +517,8 @@ got_block: *err = 0; out_release: group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + DLIMIT_FREE_BLOCK(inode, es_alloc); +out_dlimit: release_blocks(sb, es_alloc); out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); diff -NurpP --minimal linux-2.6.19/fs/ext2/ext2.h linux-2.6.19-vs2.1.x-t1/fs/ext2/ext2.h --- linux-2.6.19/fs/ext2/ext2.h 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/ext2.h 2006-11-08 04:57:46 +0100 @@ -166,6 +166,7 @@ extern const struct file_operations ext2 extern const struct address_space_operations ext2_aops; extern const struct address_space_operations ext2_aops_xip; extern const struct address_space_operations ext2_nobh_aops; +extern int ext2_sync_flags(struct inode *inode); /* namei.c */ extern struct inode_operations ext2_dir_inode_operations; diff -NurpP --minimal linux-2.6.19/fs/ext2/file.c linux-2.6.19-vs2.1.x-t1/fs/ext2/file.c --- linux-2.6.19/fs/ext2/file.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/file.c 2006-11-08 04:57:51 +0100 @@ -54,6 +54,7 @@ const struct file_operations ext2_file_o .release = ext2_release_file, .fsync = ext2_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -85,4 +86,5 @@ struct inode_operations ext2_file_inode_ #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext2/ialloc.c linux-2.6.19-vs2.1.x-t1/fs/ext2/ialloc.c --- linux-2.6.19/fs/ext2/ialloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/ialloc.c 2006-11-08 04:57:50 +0100 @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -125,6 +127,7 @@ void ext2_free_inode (struct inode * ino ext2_xattr_delete_inode(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } es = EXT2_SB(sb)->s_es; @@ -464,6 +467,11 @@ struct inode *ext2_new_inode(struct inod if (!inode) return ERR_PTR(-ENOMEM); + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto fail_dlim; + } ei = EXT2_I(inode); sbi = EXT2_SB(sb); es = sbi->s_es; @@ -577,7 +585,8 @@ got: inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ @@ -625,12 +634,15 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); return ERR_PTR(err); fail: + DLIMIT_FREE_INODE(inode); +fail_dlim: make_bad_inode(inode); iput(inode); return ERR_PTR(err); diff -NurpP --minimal linux-2.6.19/fs/ext2/inode.c linux-2.6.19-vs2.1.x-t1/fs/ext2/inode.c --- linux-2.6.19/fs/ext2/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/inode.c 2006-11-30 18:53:18 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -913,7 +914,7 @@ void ext2_truncate (struct inode * inode return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return; ext2_discard_prealloc(inode); @@ -1042,25 +1043,70 @@ void ext2_set_inode_flags(struct inode * { unsigned int flags = EXT2_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT2_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT2_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext2_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & ~(EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | + EXT2_BARRIER_FL | EXT2_NOATIME_FL | + EXT2_SYNC_FL | EXT2_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT2_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT2_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT2_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT2_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT2_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT2_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT2_DIRSYNC_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + + return 0; +} + void ext2_read_inode (struct inode * inode) { struct ext2_inode_info *ei = EXT2_I(inode); ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1071,12 +1117,17 @@ void ext2_read_inode (struct inode * ino goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_tag)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1173,8 +1224,8 @@ static int ext2_update_inode(struct inod struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1209,6 +1260,9 @@ static int ext2_update_inode(struct inod raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_TAGGING_INTERN + raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1295,7 +1349,8 @@ int ext2_setattr(struct dentry *dentry, if (error) return error; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) { error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; if (error) return error; diff -NurpP --minimal linux-2.6.19/fs/ext2/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/ext2/ioctl.c --- linux-2.6.19/fs/ext2/ioctl.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/ioctl.c 2006-11-08 04:57:52 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -52,7 +54,9 @@ int ext2_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT2_APPEND_FL | + EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } @@ -71,7 +75,8 @@ int ext2_ioctl (struct inode * inode, st case EXT2_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *) arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.19/fs/ext2/namei.c linux-2.6.19-vs2.1.x-t1/fs/ext2/namei.c --- linux-2.6.19/fs/ext2/namei.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/namei.c 2006-11-30 18:53:18 +0100 @@ -31,6 +31,7 @@ */ #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -66,6 +67,7 @@ static struct dentry *ext2_lookup(struct inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + dx_propagate_tag(nd, inode); } return d_splice_alias(inode, dentry); } @@ -391,6 +393,7 @@ struct inode_operations ext2_dir_inode_o #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_special_inode_operations = { @@ -402,4 +405,5 @@ struct inode_operations ext2_special_ino #endif .setattr = ext2_setattr, .permission = ext2_permission, + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext2/super.c linux-2.6.19-vs2.1.x-t1/fs/ext2/super.c --- linux-2.6.19/fs/ext2/super.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/super.c 2006-11-08 04:57:51 +0100 @@ -227,8 +227,8 @@ static int ext2_show_options(struct seq_ } #ifdef CONFIG_QUOTA -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); -static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off); +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off); #endif static struct super_operations ext2_sops = { @@ -324,7 +324,7 @@ enum { Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -352,6 +352,10 @@ static match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_xip, "xip"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_grpquota, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, @@ -420,6 +424,20 @@ static int parse_options (char * options case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + set_opt (sbi->s_mount_opt, TAGGED); + break; + case Opt_notag: + clear_opt (sbi->s_mount_opt, TAGGED); + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + set_opt (sbi->s_mount_opt, TAGGED); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -728,6 +746,8 @@ static int ext2_fill_super(struct super_ if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1036,6 +1056,13 @@ static int ext2_remount (struct super_bl goto restore_opts; } + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("EXT2-fs: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1148,10 +1175,11 @@ static int ext2_get_sb(struct file_syste * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -1192,10 +1220,11 @@ static ssize_t ext2_quota_read(struct su } /* Write to quotafile */ -static ssize_t ext2_quota_write(struct super_block *sb, int type, +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); diff -NurpP --minimal linux-2.6.19/fs/ext2/symlink.c linux-2.6.19-vs2.1.x-t1/fs/ext2/symlink.c --- linux-2.6.19/fs/ext2/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/symlink.c 2006-11-08 04:57:46 +0100 @@ -38,6 +38,7 @@ struct inode_operations ext2_symlink_ino .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; struct inode_operations ext2_fast_symlink_inode_operations = { @@ -49,4 +50,5 @@ struct inode_operations ext2_fast_symlin .listxattr = ext2_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext2_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext2/xattr.c linux-2.6.19-vs2.1.x-t1/fs/ext2/xattr.c --- linux-2.6.19/fs/ext2/xattr.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext2/xattr.c 2006-11-08 04:57:50 +0100 @@ -60,6 +60,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -644,8 +645,12 @@ ext2_xattr_set2(struct inode *inode, str the inode. */ ea_bdebug(new_bh, "reusing block"); + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) { + DLIMIT_FREE_BLOCK(inode, 1); unlock_buffer(new_bh); goto cleanup; } @@ -739,6 +744,7 @@ ext2_xattr_set2(struct inode *inode, str le32_to_cpu(HDR(old_bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", @@ -803,6 +809,7 @@ ext2_xattr_delete_inode(struct inode *in mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); } EXT2_I(inode)->i_file_acl = 0; diff -NurpP --minimal linux-2.6.19/fs/ext3/balloc.c linux-2.6.19-vs2.1.x-t1/fs/ext3/balloc.c --- linux-2.6.19/fs/ext3/balloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/balloc.c 2006-11-08 19:46:02 +0100 @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -613,8 +615,10 @@ void ext3_free_blocks(handle_t *handle, return; } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) + if (dquot_freed_blocks) { + DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + } return; } @@ -1349,18 +1353,33 @@ out: * * Check if filesystem has at least 1 free block available for allocation. */ -static int ext3_has_free_blocks(struct ext3_sb_info *sbi) +static int ext3_has_free_blocks(struct super_block *sb) { + struct ext3_sb_info *sbi = EXT3_SB(sb); ext3_fsblk_t free_blocks, root_blocks; + int cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%lu, root=%lu", + sb, free_blocks, root_blocks); + + DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks); + + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /** @@ -1377,7 +1396,7 @@ static int ext3_has_free_blocks(struct e */ int ext3_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) + if (!ext3_has_free_blocks(sb) || (*retries)++ > 3) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); @@ -1440,6 +1459,8 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h *errp = -EDQUOT; return 0; } + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto out_dlimit; sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; @@ -1456,7 +1477,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) my_rsv = &block_i->rsv_window_node; - if (!ext3_has_free_blocks(sbi)) { + if (!ext3_has_free_blocks(sb)) { *errp = -ENOSPC; goto out; } @@ -1650,6 +1671,9 @@ allocated: io_error: *errp = -EIO; out: + if (!performed_allocation) + DLIMIT_FREE_BLOCK(inode, 1); +out_dlimit: if (fatal) { *errp = fatal; ext3_std_error(sb, fatal); diff -NurpP --minimal linux-2.6.19/fs/ext3/file.c linux-2.6.19-vs2.1.x-t1/fs/ext3/file.c --- linux-2.6.19/fs/ext3/file.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/file.c 2006-11-08 04:57:51 +0100 @@ -121,6 +121,7 @@ const struct file_operations ext3_file_o .release = ext3_release_file, .fsync = ext3_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -135,5 +136,6 @@ struct inode_operations ext3_file_inode_ .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext3/ialloc.c linux-2.6.19-vs2.1.x-t1/fs/ext3/ialloc.c --- linux-2.6.19/fs/ext3/ialloc.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/ialloc.c 2006-11-08 04:57:50 +0100 @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -127,6 +129,7 @@ void ext3_free_inode (handle_t *handle, ext3_xattr_delete_inode(handle, inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); is_directory = S_ISDIR(inode->i_mode); @@ -445,6 +448,12 @@ struct inode *ext3_new_inode(handle_t *h inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_dlimit; + } ei = EXT3_I(inode); sbi = EXT3_SB(sb); @@ -566,7 +575,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ @@ -621,6 +631,8 @@ got: fail: ext3_std_error(sb, err); out: + DLIMIT_FREE_INODE(inode); +out_dlimit: iput(inode); ret = ERR_PTR(err); really_out: @@ -632,6 +644,7 @@ fail_free_drop: fail_drop: DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); diff -NurpP --minimal linux-2.6.19/fs/ext3/inode.c linux-2.6.19-vs2.1.x-t1/fs/ext3/inode.c --- linux-2.6.19/fs/ext3/inode.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/inode.c 2006-11-30 19:02:16 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -2246,7 +2247,7 @@ void ext3_truncate(struct inode *inode) return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return; /* @@ -2568,19 +2569,77 @@ void ext3_set_inode_flags(struct inode * { unsigned int flags = EXT3_I(inode)->i_flags; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & EXT3_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; + if (flags & EXT3_SYNC_FL) inode->i_flags |= S_SYNC; if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; - if (flags & EXT3_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; } +int ext3_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & ~(EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | + EXT3_BARRIER_FL | EXT3_NOATIME_FL | + EXT3_SYNC_FL | EXT3_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= EXT3_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= EXT3_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= EXT3_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= EXT3_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= EXT3_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= EXT3_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= EXT3_DIRSYNC_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext3_journal_stop(handle); + } + return err; +} + void ext3_read_inode(struct inode * inode) { struct ext3_iloc iloc; @@ -2588,6 +2647,8 @@ void ext3_read_inode(struct inode * inod struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2600,12 +2661,17 @@ void ext3_read_inode(struct inode * inod bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_tag)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2729,6 +2795,8 @@ static int ext3_do_update_inode(handle_t struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2738,29 +2806,32 @@ static int ext3_do_update_inode(handle_t raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_TAGGING_INTERN + raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2913,7 +2984,8 @@ int ext3_setattr(struct dentry *dentry, return error; if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) { handle_t *handle; /* (user+group)*(old+new) structure, inode write (sb, @@ -2935,6 +3007,8 @@ int ext3_setattr(struct dentry *dentry, inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); } diff -NurpP --minimal linux-2.6.19/fs/ext3/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/ext3/ioctl.c --- linux-2.6.19/fs/ext3/ioctl.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/ioctl.c 2006-11-30 19:09:12 +0100 @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, @@ -37,7 +39,8 @@ int ext3_ioctl (struct inode * inode, st unsigned int oldflags; unsigned int jflag; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -61,7 +64,9 @@ int ext3_ioctl (struct inode * inode, st * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & (EXT3_APPEND_FL | + EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); return -EPERM; @@ -123,7 +128,8 @@ flags_err: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; @@ -177,7 +183,8 @@ flags_err: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -212,7 +219,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(n_blocks_count, (__u32 __user *)arg)) @@ -233,7 +241,8 @@ flags_err: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, @@ -248,6 +257,38 @@ flags_err: return err; } +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_TAGGING_NONE) + case EXT3_IOC_SETTAG: { + handle_t *handle; + struct ext3_iloc iloc; + int tag; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGGED)) + return -ENOSYS; + if (get_user(tag, (int __user *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_tag = (tag & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } +#endif default: return -ENOTTY; diff -NurpP --minimal linux-2.6.19/fs/ext3/namei.c linux-2.6.19-vs2.1.x-t1/fs/ext3/namei.c --- linux-2.6.19/fs/ext3/namei.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/namei.c 2006-11-30 18:53:18 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include "namei.h" #include "xattr.h" @@ -1010,6 +1011,7 @@ static struct dentry *ext3_lookup(struct if (!inode) return ERR_PTR(-EACCES); + dx_propagate_tag(nd, inode); } return d_splice_alias(inode, dentry); } @@ -2383,6 +2385,7 @@ struct inode_operations ext3_dir_inode_o .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_special_inode_operations = { @@ -2394,4 +2397,5 @@ struct inode_operations ext3_special_ino .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext3/super.c linux-2.6.19-vs2.1.x-t1/fs/ext3/super.c --- linux-2.6.19/fs/ext3/super.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/super.c 2006-11-08 04:57:51 +0100 @@ -605,12 +605,12 @@ static int ext3_write_dquot(struct dquot static int ext3_acquire_dquot(struct dquot *dquot); static int ext3_release_dquot(struct dquot *dquot); static int ext3_mark_dquot_dirty(struct dquot *dquot); -static int ext3_write_info(struct super_block *sb, int type); -static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); -static int ext3_quota_on_mount(struct super_block *sb, int type); -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, +static int ext3_write_info(struct dqhash *hash, int type); +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path); +static int ext3_quota_on_mount(struct dqhash *hash, int type); +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off); -static ssize_t ext3_quota_write(struct super_block *sb, int type, +static ssize_t ext3_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off); static struct dquot_operations ext3_quota_operations = { @@ -677,7 +677,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota + Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -727,6 +727,10 @@ static match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -820,6 +824,20 @@ static int parse_options (char *options, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + set_opt (sbi->s_mount_opt, TAGGED); + break; + case Opt_notag: + clear_opt (sbi->s_mount_opt, TAGGED); + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + set_opt (sbi->s_mount_opt, TAGGED); + break; +#endif case Opt_nocheck: clear_opt (sbi->s_mount_opt, CHECK); break; @@ -938,7 +956,7 @@ static int parse_options (char *options, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change journalled " "quota options when quota turned on.\n"); @@ -976,7 +994,7 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change " "journalled quota options when " "quota turned on.\n"); @@ -1004,7 +1022,7 @@ clear_qf_name: set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb)) { + if (dqh_any_quota_enabled(sb->s_dqh)) { printk(KERN_ERR "EXT3-fs: Cannot change quota " "options when quota turned on.\n"); return 0; @@ -1284,7 +1302,7 @@ static void ext3_orphan_cleanup (struct /* Turn on quotas so that they are updated correctly */ for (i = 0; i < MAXQUOTAS; i++) { if (EXT3_SB(sb)->s_qf_names[i]) { - int ret = ext3_quota_on_mount(sb, i); + int ret = ext3_quota_on_mount(sb->s_dqh, i); if (ret < 0) printk(KERN_ERR "EXT3-fs: Cannot turn on journalled " @@ -1334,8 +1352,8 @@ static void ext3_orphan_cleanup (struct #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - vfs_quota_off(sb, i); + if (dqh_dqopt(sb->s_dqh)->files[i]) + vfs_quota_off(sb->s_dqh, i); } #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -1482,6 +1500,9 @@ static int ext3_fill_super (struct super NULL, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -1687,8 +1708,8 @@ static int ext3_fill_super (struct super sb->s_export_op = &ext3_export_ops; sb->s_xattr = ext3_xattr_handlers; #ifdef CONFIG_QUOTA - sb->s_qcop = &ext3_qctl_operations; - sb->dq_op = &ext3_quota_operations; + sb->s_dqh->dqh_qop = &ext3_quota_operations; + sb->s_dqh->dqh_qcop = &ext3_qctl_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ @@ -2297,6 +2318,12 @@ static int ext3_remount (struct super_bl if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("EXT3-fs: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); @@ -2450,7 +2477,7 @@ static int ext3_statfs (struct dentry * static inline struct inode *dquot_to_inode(struct dquot *dquot) { - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; + return dqh_dqopt(dquot->dq_dqh)->files[dquot->dq_type]; } static int ext3_dquot_initialize(struct inode *inode, int type) @@ -2493,7 +2520,7 @@ static int ext3_write_dquot(struct dquot inode = dquot_to_inode(dquot); handle = ext3_journal_start(inode, - EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -2509,7 +2536,7 @@ static int ext3_acquire_dquot(struct dqu handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -2525,7 +2552,7 @@ static int ext3_release_dquot(struct dqu handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + EXT3_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_release(dquot); @@ -2538,8 +2565,8 @@ static int ext3_release_dquot(struct dqu static int ext3_mark_dquot_dirty(struct dquot *dquot) { /* Are we journalling quotas? */ - if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + if (EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] || + EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext3_write_dquot(dquot); } else { @@ -2547,8 +2574,9 @@ static int ext3_mark_dquot_dirty(struct } } -static int ext3_write_info(struct super_block *sb, int type) +static int ext3_write_info(struct dqhash *hash, int type) { + struct super_block *sb = hash->dqh_sb; int ret, err; handle_t *handle; @@ -2556,7 +2584,7 @@ static int ext3_write_info(struct super_ handle = ext3_journal_start(sb->s_root->d_inode, 2); if (IS_ERR(handle)) return PTR_ERR(handle); - ret = dquot_commit_info(sb, type); + ret = dquot_commit_info(hash, type); err = ext3_journal_stop(handle); if (!ret) ret = err; @@ -2567,18 +2595,20 @@ static int ext3_write_info(struct super_ * Turn on quotas during mount time - we need to find * the quota file and such... */ -static int ext3_quota_on_mount(struct super_block *sb, int type) +static int ext3_quota_on_mount(struct dqhash *hash, int type) { - return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], - EXT3_SB(sb)->s_jquota_fmt, type); + return vfs_quota_on_mount(hash, + EXT3_SB(hash->dqh_sb)->s_qf_names[type], + EXT3_SB(hash->dqh_sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ -static int ext3_quota_on(struct super_block *sb, int type, int format_id, +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path) { + struct super_block *sb = hash->dqh_sb; int err; struct nameidata nd; @@ -2587,7 +2617,7 @@ static int ext3_quota_on(struct super_bl /* Not journalling quota? */ if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; @@ -2602,17 +2632,18 @@ static int ext3_quota_on(struct super_bl "EXT3-fs: Quota file not on filesystem root. " "Journalled quota will not work.\n"); path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -2647,10 +2678,11 @@ static ssize_t ext3_quota_read(struct su /* Write to quotafile (we know the transaction is already started and has * enough credits) */ -static ssize_t ext3_quota_write(struct super_block *sb, int type, +static ssize_t ext3_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); diff -NurpP --minimal linux-2.6.19/fs/ext3/symlink.c linux-2.6.19-vs2.1.x-t1/fs/ext3/symlink.c --- linux-2.6.19/fs/ext3/symlink.c 2005-08-29 22:25:30 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/symlink.c 2006-11-08 04:57:46 +0100 @@ -40,6 +40,7 @@ struct inode_operations ext3_symlink_ino .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; struct inode_operations ext3_fast_symlink_inode_operations = { @@ -51,4 +52,5 @@ struct inode_operations ext3_fast_symlin .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif + .sync_flags = ext3_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ext3/xattr.c linux-2.6.19-vs2.1.x-t1/fs/ext3/xattr.c --- linux-2.6.19/fs/ext3/xattr.c 2006-11-30 21:19:19 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ext3/xattr.c 2006-11-08 04:57:50 +0100 @@ -58,6 +58,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -495,6 +496,7 @@ ext3_xattr_release_block(handle_t *handl ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1); unlock_buffer(bh); ea_bdebug(bh, "refcount now=%d; releasing", @@ -763,11 +765,14 @@ inserted: if (new_bh == bs->bh) ea_bdebug(new_bh, "keeping"); else { + error = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, 1)) + goto cleanup; /* The old block is released after updating the inode. */ error = -EDQUOT; if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; + goto cleanup_dlimit; error = ext3_journal_get_write_access(handle, new_bh); if (error) @@ -844,6 +849,8 @@ cleanup: cleanup_dquot: DQUOT_FREE_BLOCK(inode, 1); +cleanup_dlimit: + DLIMIT_FREE_BLOCK(inode, 1); goto cleanup; bad_block: diff -NurpP --minimal linux-2.6.19/fs/fcntl.c linux-2.6.19-vs2.1.x-t1/fs/fcntl.c --- linux-2.6.19/fs/fcntl.c 2006-11-30 21:19:23 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/fcntl.c 2006-11-08 04:57:48 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,8 @@ repeat: error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -125,6 +128,7 @@ static int dupfd(struct file *file, unsi FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -177,6 +181,9 @@ asmlinkage long sys_dup2(unsigned int ol if (tofree) filp_close(tofree, files); + else + vx_openfd_inc(newfd); /* fd was unused */ + err = newfd; out: return err; diff -NurpP --minimal linux-2.6.19/fs/file_table.c linux-2.6.19-vs2.1.x-t1/fs/file_table.c --- linux-2.6.19/fs/file_table.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/file_table.c 2006-11-08 04:57:48 +0100 @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -120,6 +122,8 @@ struct file *get_empty_filp(void) f->f_gid = tsk->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ + f->f_xid = vx_current_xid(); + vx_files_inc(f); return f; over: @@ -175,6 +179,8 @@ void fastcall __fput(struct file *file) if (file->f_mode & FMODE_WRITE) put_write_access(inode); put_pid(file->f_owner.pid); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; @@ -240,6 +246,8 @@ void put_filp(struct file *file) { if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); + vx_files_dec(file); + file->f_xid = 0; file_kill(file); file_free(file); } diff -NurpP --minimal linux-2.6.19/fs/gfs2/log.c linux-2.6.19-vs2.1.x-t1/fs/gfs2/log.c --- linux-2.6.19/fs/gfs2/log.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/gfs2/log.c 2006-11-08 22:48:20 +0100 @@ -319,7 +319,7 @@ static u64 log_bmap(struct gfs2_sbd *sdp bh_map.b_size = 1 << inode->i_blkbits; error = gfs2_block_map(inode, lbn, 0, &bh_map); if (error || !bh_map.b_blocknr) - printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn); + printk(KERN_INFO "error=%d, dbn=%lu lbn=%u", error, bh_map.b_blocknr, lbn); gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); return bh_map.b_blocknr; diff -NurpP --minimal linux-2.6.19/fs/hfsplus/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/hfsplus/ioctl.c --- linux-2.6.19/fs/hfsplus/ioctl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/hfsplus/ioctl.c 2006-11-08 04:57:52 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "hfsplus_fs.h" @@ -35,7 +36,8 @@ int hfsplus_ioctl(struct inode *inode, s flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) diff -NurpP --minimal linux-2.6.19/fs/inode.c linux-2.6.19-vs2.1.x-t1/fs/inode.c --- linux-2.6.19/fs/inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/inode.c 2006-11-08 04:57:53 +0100 @@ -115,6 +115,9 @@ static struct inode *alloc_inode(struct struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + + /* essential because of inode slab reuse */ + inode->i_tag = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -126,6 +129,9 @@ static struct inode *alloc_inode(struct inode->i_blocks = 0; inode->i_bytes = 0; inode->i_generation = 0; +#ifdef CONFIG_QUOTACTL + inode->i_dqh = dqhget(sb->s_dqh); +#endif #ifdef CONFIG_QUOTA memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); #endif @@ -172,6 +178,8 @@ void destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); + if (dqhash_valid(inode->i_dqh)) + dqhput(inode->i_dqh); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -233,6 +241,8 @@ void __iget(struct inode * inode) inodes_stat.nr_unused--; } +EXPORT_SYMBOL_GPL(__iget); + /** * clear_inode - clear an inode * @inode: inode to clear @@ -1245,12 +1255,13 @@ EXPORT_SYMBOL(inode_needs_sync); /* Function back in dquot.c */ int remove_inode_dquot_ref(struct inode *, int, struct list_head *); -void remove_dquot_ref(struct super_block *sb, int type, +void remove_dquot_ref(struct dqhash *hash, int type, struct list_head *tofree_head) { struct inode *inode; + struct super_block *sb = hash->dqh_sb; - if (!sb->dq_op) + if (!hash->dqh_qop) return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ diff -NurpP --minimal linux-2.6.19/fs/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/ioctl.c --- linux-2.6.19/fs/ioctl.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/ioctl.c 2006-11-30 18:53:18 +0100 @@ -12,10 +12,19 @@ #include #include #include +#include +#include +#include #include #include + +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static long do_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -146,6 +155,48 @@ int vfs_ioctl(struct file *filp, unsigne else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_TAGGING_NONE + case FIOC_GETTAG: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_tag, (int __user *) arg); + break; + } + case FIOC_SETTAG: { + struct inode *inode = filp->f_dentry->d_inode; + int tag; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGGED)) + break; + error = -EFAULT; + if (get_user(tag, (int __user *) arg)) + break; + error = 0; + inode->i_tag = (tag & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff -NurpP --minimal linux-2.6.19/fs/ioprio.c linux-2.6.19-vs2.1.x-t1/fs/ioprio.c --- linux-2.6.19/fs/ioprio.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ioprio.c 2006-11-30 18:53:18 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include static int set_task_ioprio(struct task_struct *task, int ioprio) { @@ -109,7 +110,7 @@ asmlinkage long sys_ioprio_set(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; @@ -197,7 +198,7 @@ asmlinkage long sys_ioprio_get(int which if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) break; diff -NurpP --minimal linux-2.6.19/fs/jfs/acl.c linux-2.6.19-vs2.1.x-t1/fs/jfs/acl.c --- linux-2.6.19/fs/jfs/acl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/acl.c 2006-11-08 04:57:46 +0100 @@ -232,7 +232,8 @@ int jfs_setattr(struct dentry *dentry, s return rc; if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) || + (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) { if (DQUOT_TRANSFER(inode, iattr)) return -EDQUOT; } diff -NurpP --minimal linux-2.6.19/fs/jfs/file.c linux-2.6.19-vs2.1.x-t1/fs/jfs/file.c --- linux-2.6.19/fs/jfs/file.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/file.c 2006-11-08 21:52:37 +0100 @@ -98,6 +98,7 @@ struct inode_operations jfs_file_inode_o .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_file_operations = { @@ -109,6 +110,7 @@ const struct file_operations jfs_file_op .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .fsync = jfs_fsync, diff -NurpP --minimal linux-2.6.19/fs/jfs/inode.c linux-2.6.19-vs2.1.x-t1/fs/jfs/inode.c --- linux-2.6.19/fs/jfs/inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/inode.c 2006-11-08 04:57:50 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -144,6 +145,7 @@ void jfs_delete_inode(struct inode *inod DQUOT_INIT(inode); DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); + DLIMIT_FREE_INODE(inode); } clear_inode(inode); diff -NurpP --minimal linux-2.6.19/fs/jfs/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/jfs/ioctl.c --- linux-2.6.19/fs/jfs/ioctl.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/ioctl.c 2006-11-08 04:57:51 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,8 @@ int jfs_ioctl(struct inode * inode, stru case JFS_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) @@ -84,8 +86,8 @@ int jfs_ioctl(struct inode * inode, stru * the relevant capability. */ if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { + ((flags ^ oldflags) & (JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_dinode.h linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_dinode.h --- linux-2.6.19/fs/jfs/jfs_dinode.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_dinode.h 2006-11-08 04:57:51 +0100 @@ -162,9 +162,12 @@ struct dinode { #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ -#define JFS_FL_USER_VISIBLE 0x03F80000 +#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define JFS_IUNLINK_FL 0x08000000 /* Immutable unlink */ + +#define JFS_FL_USER_VISIBLE 0x0FF80000 #define JFS_FL_USER_MODIFIABLE 0x03F80000 -#define JFS_FL_INHERIT 0x03C80000 +#define JFS_FL_INHERIT 0x0BC80000 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ #define JFS_IOC_GETFLAGS _IOR('f', 1, long) diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_dtree.c linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_dtree.c --- linux-2.6.19/fs/jfs/jfs_dtree.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_dtree.c 2006-11-08 04:57:50 +0100 @@ -102,6 +102,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -383,10 +384,10 @@ static u32 add_index(tid_t tid, struct i */ if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) goto clean_up; - if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); - goto clean_up; - } + if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage)) + goto clean_up_dquot; + if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) + goto clean_up_dlimit; /* * Save the table, we're going to overwrite it with the @@ -479,6 +480,12 @@ static u32 add_index(tid_t tid, struct i return index; + clean_up_dlimit: + DLIMIT_FREE_BLOCK(ip, sbi->nbperpage); + + clean_up_dquot: + DQUOT_FREE_BLOCK(ip, sbi->nbperpage); + clean_up: jfs_ip->next_index--; @@ -952,6 +959,7 @@ static int dtSplitUp(tid_t tid, struct tlock *tlck; struct lv *lv; int quota_allocation = 0; + int dlimit_allocation = 0; /* get split page */ smp = split->mp; @@ -1036,6 +1044,12 @@ static int dtSplitUp(tid_t tid, } quota_allocation += n; + if (DLIMIT_ALLOC_BLOCK(ip, n)) { + rc = -ENOSPC; + goto extendOut; + } + dlimit_allocation += n; + if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, (s64) n, &nxaddr))) goto extendOut; @@ -1309,6 +1323,9 @@ static int dtSplitUp(tid_t tid, freeKeyName: kfree(key.name); + /* Rollback dlimit allocation */ + if (rc && dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation */ if (rc && quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1376,6 +1393,12 @@ static int dtSplitPage(tid_t tid, struct release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1926,6 +1949,12 @@ static int dtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } BT_MARK_DIRTY(rmp, ip); /* @@ -2292,6 +2321,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&fp->header.self); + /* Free dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation. */ DQUOT_FREE_BLOCK(ip, xlen); @@ -2368,6 +2399,8 @@ static int dtDeleteUp(tid_t tid, struct xlen = lengthPXD(&p->header.self); + /* Free dlimit allocation */ + DLIMIT_FREE_BLOCK(ip, xlen); /* Free quota allocation */ DQUOT_FREE_BLOCK(ip, xlen); diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_extent.c linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_extent.c --- linux-2.6.19/fs/jfs/jfs_extent.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_extent.c 2006-11-08 04:57:50 +0100 @@ -18,6 +18,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_superblock.h" @@ -147,6 +148,14 @@ extAlloc(struct inode *ip, s64 xlen, s64 return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + return -ENOSPC; + } + /* determine the value of the extent flag */ xflag = abnr ? XAD_NOTRECORDED : 0; @@ -164,6 +173,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 */ if (rc) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); @@ -261,6 +271,13 @@ int extRealloc(struct inode *ip, s64 nxl mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) { + DQUOT_FREE_BLOCK(ip, nxlen); + dbFree(ip, nxaddr, (s64) nxlen); + up(&JFS_IP(ip)->commit_sem); + return -ENOSPC; + } delta = nxlen - xlen; @@ -297,6 +314,7 @@ int extRealloc(struct inode *ip, s64 nxl /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } @@ -308,6 +326,7 @@ int extRealloc(struct inode *ip, s64 nxl */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); + DLIMIT_FREE_BLOCK(ip, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); goto exit; } diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_filsys.h linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_filsys.h --- linux-2.6.19/fs/jfs/jfs_filsys.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_filsys.h 2006-11-08 04:57:46 +0100 @@ -84,6 +84,7 @@ #define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ /* directory entries */ +#define JFS_TAGGED 0x00800000 /* Context Tagging */ /* * buffer cache configuration diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_imap.c linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_imap.c --- linux-2.6.19/fs/jfs/jfs_imap.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_imap.c 2006-11-30 18:53:18 +0100 @@ -45,6 +45,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" @@ -3075,6 +3076,8 @@ static int copy_from_dinode(struct dinod { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); @@ -3094,14 +3097,18 @@ static int copy_from_dinode(struct dinod } ip->i_nlink = le32_to_cpu(dip->di_nlink); - jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0); + + jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid); if (sbi->uid == -1) ip->i_uid = jfs_ip->saved_uid; else { ip->i_uid = sbi->uid; } - jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); + jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid); if (sbi->gid == -1) ip->i_gid = jfs_ip->saved_gid; else { @@ -3166,14 +3173,12 @@ static void copy_to_dinode(struct dinode dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - if (sbi->uid == -1) - dip->di_uid = cpu_to_le32(ip->i_uid); - else - dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); - if (sbi->gid == -1) - dip->di_gid = cpu_to_le32(ip->i_gid); - else - dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); + + dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip), + (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag)); + dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip), + (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag)); + /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_inode.c linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_inode.c --- linux-2.6.19/fs/jfs/jfs_inode.c 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_inode.c 2006-11-08 04:57:51 +0100 @@ -18,6 +18,8 @@ #include #include +#include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -30,19 +32,59 @@ void jfs_set_inode_flags(struct inode *i { unsigned int flags = JFS_IP(inode)->mode2; - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); + inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); if (flags & JFS_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & JFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & JFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + + if (flags & JFS_SYNC_FL) + inode->i_flags |= S_SYNC; if (flags & JFS_APPEND_FL) inode->i_flags |= S_APPEND; if (flags & JFS_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) inode->i_flags |= S_DIRSYNC; - if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; +} + +int jfs_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + + oldflags = JFS_IP(inode)->mode2; + newflags = oldflags & ~(JFS_APPEND_FL | + JFS_IMMUTABLE_FL | JFS_IUNLINK_FL | + JFS_BARRIER_FL | JFS_NOATIME_FL | + JFS_SYNC_FL | JFS_DIRSYNC_FL); + + if (IS_APPEND(inode)) + newflags |= JFS_APPEND_FL; + if (IS_IMMUTABLE(inode)) + newflags |= JFS_IMMUTABLE_FL; + if (IS_IUNLINK(inode)) + newflags |= JFS_IUNLINK_FL; + if (IS_BARRIER(inode)) + newflags |= JFS_BARRIER_FL; + + /* we do not want to copy superblock flags */ + if (inode->i_flags & S_NOATIME) + newflags |= JFS_NOATIME_FL; + if (inode->i_flags & S_SYNC) + newflags |= JFS_SYNC_FL; + if (inode->i_flags & S_DIRSYNC) + newflags |= JFS_DIRSYNC_FL; + + if (oldflags ^ newflags) { + JFS_IP(inode)->mode2 = newflags; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return 0; } /* @@ -90,10 +132,17 @@ struct inode *ialloc(struct inode *paren jfs_inode->saved_uid = inode->i_uid; jfs_inode->saved_gid = inode->i_gid; + inode->i_tag = dx_current_fstag(sb); + if (DLIMIT_ALLOC_INODE(inode)) { + iput(inode); + return NULL; + } + /* * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { + DLIMIT_FREE_INODE(inode); DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_inode.h linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_inode.h --- linux-2.6.19/fs/jfs/jfs_inode.h 2006-11-30 21:19:25 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_inode.h 2006-11-08 04:57:51 +0100 @@ -31,6 +31,7 @@ extern void jfs_truncate(struct inode *) extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern int jfs_sync_flags(struct inode *); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); diff -NurpP --minimal linux-2.6.19/fs/jfs/jfs_xtree.c linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_xtree.c --- linux-2.6.19/fs/jfs/jfs_xtree.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/jfs_xtree.c 2006-11-08 04:57:50 +0100 @@ -21,6 +21,7 @@ #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -841,7 +842,12 @@ int xtInsert(tid_t tid, /* transaction hint = 0; if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) goto out; + if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) { + DQUOT_FREE_BLOCK(ip, xlen); + goto out; + } if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); goto out; } @@ -871,6 +877,7 @@ int xtInsert(tid_t tid, /* transaction /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); + DLIMIT_FREE_BLOCK(ip, xlen); DQUOT_FREE_BLOCK(ip, xlen); } return rc; @@ -1231,6 +1238,7 @@ xtSplitPage(tid_t tid, struct inode *ip, struct tlock *tlck; struct xtlock *sxtlck = NULL, *rxtlck = NULL; int quota_allocation = 0; + int dlimit_allocation = 0; smp = split->mp; sp = XT_PAGE(ip, smp); @@ -1250,6 +1258,13 @@ xtSplitPage(tid_t tid, struct inode *ip, quota_allocation += lengthPXD(pxd); + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = -ENOSPC; + goto clean_up; + } + dlimit_allocation += lengthPXD(pxd); + /* * allocate the new right page for the split */ @@ -1451,6 +1466,9 @@ xtSplitPage(tid_t tid, struct inode *ip, clean_up: + /* Rollback dlimit allocation. */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(ip, dlimit_allocation); /* Rollback quota allocation. */ if (quota_allocation) DQUOT_FREE_BLOCK(ip, quota_allocation); @@ -1515,6 +1533,12 @@ xtSplitRoot(tid_t tid, release_metapage(rmp); return -EDQUOT; } + /* Allocate blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + DQUOT_FREE_BLOCK(ip, lengthPXD(pxd)); + release_metapage(rmp); + return -ENOSPC; + } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -3941,6 +3965,8 @@ s64 xtTruncate(tid_t tid, struct inode * else ip->i_size = newsize; + /* update dlimit allocation to reflect freed blocks */ + DLIMIT_FREE_BLOCK(ip, nfreed); /* update quota allocation to reflect freed blocks */ DQUOT_FREE_BLOCK(ip, nfreed); diff -NurpP --minimal linux-2.6.19/fs/jfs/namei.c linux-2.6.19-vs2.1.x-t1/fs/jfs/namei.c --- linux-2.6.19/fs/jfs/namei.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/namei.c 2006-11-30 18:53:18 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -1461,6 +1462,7 @@ static struct dentry *jfs_lookup(struct return ERR_PTR(-EACCES); } + dx_propagate_tag(nd, ip); dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) @@ -1513,6 +1515,7 @@ struct inode_operations jfs_dir_inode_op .setattr = jfs_setattr, .permission = jfs_permission, #endif + .sync_flags = jfs_sync_flags, }; const struct file_operations jfs_dir_operations = { diff -NurpP --minimal linux-2.6.19/fs/jfs/super.c linux-2.6.19-vs2.1.x-t1/fs/jfs/super.c --- linux-2.6.19/fs/jfs/super.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/super.c 2006-11-08 04:57:51 +0100 @@ -194,7 +194,8 @@ static void jfs_put_super(struct super_b enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_tag, Opt_notag, Opt_tagid }; static match_table_t tokens = { @@ -204,6 +205,10 @@ static match_table_t tokens = { {Opt_resize, "resize=%u"}, {Opt_resize_nosize, "resize"}, {Opt_errors, "errors=%s"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, @@ -338,6 +343,20 @@ static int parse_options(char *options, } break; } +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + *flag |= JFS_TAGGED; + break; + case Opt_notag: + *flag &= JFS_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + *flag |= JFS_TAGGED; + break; +#endif default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -368,6 +387,13 @@ static int jfs_remount(struct super_bloc if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + + if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) { + printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n", + sb->s_id); + return -EINVAL; + } + if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR @@ -439,6 +465,9 @@ static int jfs_fill_super(struct super_b #ifdef CONFIG_JFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + /* map mount option tagxid */ + if (sbi->flag & JFS_TAGGED) + sb->s_flags |= MS_TAGGED; if (newLVSize) { printk(KERN_ERR "resize option for remount only\n"); @@ -615,10 +644,11 @@ static int jfs_show_options(struct seq_f * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, +static ssize_t jfs_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -660,10 +690,11 @@ static ssize_t jfs_quota_read(struct sup } /* Write to quotafile */ -static ssize_t jfs_quota_write(struct super_block *sb, int type, +static ssize_t jfs_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); diff -NurpP --minimal linux-2.6.19/fs/jfs/xattr.c linux-2.6.19-vs2.1.x-t1/fs/jfs/xattr.c --- linux-2.6.19/fs/jfs/xattr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/jfs/xattr.c 2006-11-08 21:52:09 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -263,9 +264,16 @@ static int ea_write(struct inode *ip, st if (DQUOT_ALLOC_BLOCK(ip, nblocks)) { return -EDQUOT; } + /* Allocate new blocks to dlimit. */ + if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) { + DQUOT_FREE_BLOCK(ip, nblocks); + return -ENOSPC; + } rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { + /*Rollback dlimit allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); /*Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); return rc; @@ -332,6 +340,8 @@ static int ea_write(struct inode *ip, st failed: /* Rollback quota allocation. */ + DLIMIT_FREE_BLOCK(ip, nblocks); + /* Rollback quota allocation. */ DQUOT_FREE_BLOCK(ip, nblocks); dbFree(ip, blkno, nblocks); @@ -468,6 +478,7 @@ static int ea_get(struct inode *inode, s s64 blkno; int rc; int quota_allocation = 0; + int dlimit_allocation = 0; /* When fsck.jfs clears a bad ea, it doesn't clear the size */ if (ji->ea.flag == 0) @@ -543,6 +554,12 @@ static int ea_get(struct inode *inode, s quota_allocation = blocks_needed; + /* Allocate new blocks to dlimit. */ + rc = -ENOSPC; + if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed)) + goto clean_up; + dlimit_allocation = blocks_needed; + rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, &blkno); if (rc) @@ -599,6 +616,9 @@ static int ea_get(struct inode *inode, s return ea_size; clean_up: + /* Rollback dlimit allocation */ + if (dlimit_allocation) + DLIMIT_FREE_BLOCK(inode, dlimit_allocation); /* Rollback quota allocation */ if (quota_allocation) DQUOT_FREE_BLOCK(inode, quota_allocation); @@ -675,8 +695,10 @@ static int ea_put(tid_t tid, struct inod } /* If old blocks exist, they must be removed from quota allocation. */ - if (old_blocks) + if (old_blocks) { + DLIMIT_FREE_BLOCK(inode, old_blocks); DQUOT_FREE_BLOCK(inode, old_blocks); + } inode->i_ctime = CURRENT_TIME; diff -NurpP --minimal linux-2.6.19/fs/libfs.c linux-2.6.19-vs2.1.x-t1/fs/libfs.c --- linux-2.6.19/fs/libfs.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/libfs.c 2006-11-08 04:57:43 +0100 @@ -124,7 +124,8 @@ static inline unsigned char dt_type(stru * both impossible due to the lock on directory. */ -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +static inline int do_dcache_readdir_filter(struct file * filp, + void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry)) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; @@ -157,6 +158,8 @@ int dcache_readdir(struct file * filp, v next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; + if (filter && !filter(next)) + continue; spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) @@ -172,6 +175,18 @@ int dcache_readdir(struct file * filp, v return 0; } +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, NULL); +} + +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir, + int (*filter)(struct dentry *)) +{ + return do_dcache_readdir_filter(filp, dirent, filldir, filter); +} + + ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; @@ -611,6 +626,7 @@ EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(dcache_readdir_filter); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); EXPORT_SYMBOL(simple_commit_write); diff -NurpP --minimal linux-2.6.19/fs/lockd/clntproc.c linux-2.6.19-vs2.1.x-t1/fs/lockd/clntproc.c --- linux-2.6.19/fs/lockd/clntproc.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/lockd/clntproc.c 2006-11-08 04:57:40 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/fs/lockd/mon.c linux-2.6.19-vs2.1.x-t1/fs/lockd/mon.c --- linux-2.6.19/fs/lockd/mon.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/lockd/mon.c 2006-11-08 04:57:40 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_MONITOR diff -NurpP --minimal linux-2.6.19/fs/locks.c linux-2.6.19-vs2.1.x-t1/fs/locks.c --- linux-2.6.19/fs/locks.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/locks.c 2006-11-30 18:53:18 +0100 @@ -125,6 +125,8 @@ #include #include #include +#include +#include #include #include @@ -147,6 +149,8 @@ static kmem_cache_t *filelock_cache __re /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { + if (!vx_locks_avail(1)) + return NULL; return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); } @@ -172,6 +176,7 @@ static void locks_free_lock(struct file_ BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!list_empty(&fl->fl_link)); + vx_locks_dec(fl); locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } @@ -191,6 +196,7 @@ void locks_init_lock(struct file_lock *f fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + fl->fl_xid = -1; } EXPORT_SYMBOL(locks_init_lock); @@ -248,6 +254,7 @@ void locks_copy_lock(struct file_lock *n new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; + new->fl_xid = fl->fl_xid; locks_copy_private(new, fl); } @@ -286,6 +293,11 @@ static int flock_make_lock(struct file * fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; + + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + fl->fl_xid = filp->f_xid; + vx_locks_inc(fl); *lock = fl; return 0; @@ -451,6 +463,7 @@ static int lease_init(struct file *filp, fl->fl_owner = current->files; fl->fl_pid = current->tgid; + fl->fl_xid = vx_current_xid(); fl->fl_file = filp; fl->fl_flags = FL_LEASE; @@ -470,6 +483,11 @@ static int lease_alloc(struct file *filp if (fl == NULL) goto out; + fl->fl_xid = vx_current_xid(); + if (filp) + vxd_assert(filp->f_xid == fl->fl_xid, + "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid); + vx_locks_inc(fl); error = lease_init(filp, type, fl); if (error) { locks_free_lock(fl); @@ -790,6 +808,7 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + vx_locks_inc(new_fl); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; error = 0; @@ -801,7 +820,8 @@ out: return error; } -static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock) +static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, + struct file_lock *conflock, xid_t xid) { struct file_lock *fl; struct file_lock *new_fl = NULL; @@ -811,6 +831,8 @@ static int __posix_lock_file_conf(struct struct file_lock **before; int error, added = 0; + vxd_assert(xid == vx_current_xid(), + "xid(%d) == current(%d)", xid, vx_current_xid()); /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. @@ -821,7 +843,11 @@ static int __posix_lock_file_conf(struct (request->fl_type != F_UNLCK || request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { new_fl = locks_alloc_lock(); + new_fl->fl_xid = xid; + vx_locks_inc(new_fl); new_fl2 = locks_alloc_lock(); + new_fl2->fl_xid = xid; + vx_locks_inc(new_fl2); } lock_kernel(); @@ -1018,7 +1044,8 @@ static int __posix_lock_file_conf(struct */ int posix_lock_file(struct file *filp, struct file_lock *fl) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, NULL); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, NULL, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file); @@ -1033,7 +1060,8 @@ EXPORT_SYMBOL(posix_lock_file); int posix_lock_file_conf(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return __posix_lock_file_conf(filp->f_dentry->d_inode, fl, conflock); + return __posix_lock_file_conf(filp->f_dentry->d_inode, + fl, conflock, filp->f_xid); } EXPORT_SYMBOL(posix_lock_file_conf); @@ -1123,7 +1151,7 @@ int locks_mandatory_area(int read_write, fl.fl_end = offset + count - 1; for (;;) { - error = __posix_lock_file_conf(inode, &fl, NULL); + error = __posix_lock_file_conf(inode, &fl, NULL, filp->f_xid); if (error != -EAGAIN) break; if (!(fl.fl_flags & FL_SLEEP)) @@ -1685,6 +1713,11 @@ int fcntl_setlk(unsigned int fd, struct if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -1828,6 +1861,11 @@ int fcntl_setlk64(unsigned int fd, struc if (file_lock == NULL) return -ENOLCK; + vxd_assert(filp->f_xid == vx_current_xid(), + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid()); + file_lock->fl_xid = filp->f_xid; + vx_locks_inc(file_lock); + /* * This might block, so we do it before checking the inode. */ @@ -2123,6 +2161,10 @@ int get_locks_status(char *buffer, char list_for_each(tmp, &file_lock_list) { struct list_head *btmp; struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + + if (!vx_check(fl->fl_xid, VS_WATCH_P|VS_IDENT)) + continue; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); diff -NurpP --minimal linux-2.6.19/fs/namei.c linux-2.6.19-vs2.1.x-t1/fs/namei.c --- linux-2.6.19/fs/namei.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/namei.c 2006-11-30 19:41:35 +0100 @@ -32,6 +32,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -225,6 +230,31 @@ int generic_permission(struct inode *ino return -EACCES; } +static inline int dx_barrier(struct inode *inode) +{ + if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN)) { + vxwprintk(1, "xid=%d did hit the barrier.", + vx_current_xid()); + return 1; + } + return 0; +} + +static inline int dx_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (dx_barrier(inode)) + return -EACCES; + if (inode->i_tag == 0) + return 0; + if (dx_check(inode->i_tag, DX_ADMIN|DX_WATCH|DX_IDENT)) + return 0; + + vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_tag, inode->i_ino, + vxd_cond_path(nd)); + return -EACCES; +} + int permission(struct inode *inode, int mask, struct nameidata *nd) { umode_t mode = inode->i_mode; @@ -235,14 +265,14 @@ int permission(struct inode *inode, int /* * Nobody gets write access to a read-only fs. */ - if (IS_RDONLY(inode) && + if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /* * Nobody gets write access to an immutable file. */ - if (IS_IMMUTABLE(inode)) + if (IS_IMMUTABLE(inode) && !IS_COW(inode)) return -EACCES; } @@ -256,6 +286,8 @@ int permission(struct inode *inode, int /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = dx_permission(inode, mask, nd))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -431,6 +463,8 @@ static int exec_permission_lite(struct i { umode_t mode = inode->i_mode; + if (dx_barrier(inode)) + return -EACCES; if (inode->i_op && inode->i_op->permission) return -EAGAIN; @@ -736,7 +770,8 @@ static __always_inline void follow_dotdo if (nd->dentry == fs->root && nd->mnt == fs->rootmnt) { read_unlock(&fs->lock); - break; + /* FIXME: for sane '/' avoid follow_mount() */ + return; } read_unlock(&fs->lock); spin_lock(&dcache_lock); @@ -773,16 +808,34 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + struct inode *inode; if (!dentry) goto need_lookup; if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; + inode = dentry->d_inode; + if (!inode) + goto done; + if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *de = PDE(inode); + + if (de && !vx_hide_check(0, de->vx_flags)) + goto hidden; + } + if (!dx_check(inode->i_tag, DX_WATCH|DX_ADMIN|DX_HOSTID|DX_IDENT)) + goto hidden; done: path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; +hidden: + vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.", + vx_current_xid(), inode, inode->i_tag, inode->i_ino, + vxd_path(dentry, mnt)); + dput(dentry); + return -ENOENT; need_lookup: dentry = real_lookup(nd->dentry, name, nd); @@ -1384,7 +1437,8 @@ static inline int check_sticky(struct in * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, + int isdir, struct nameidata *nd) { int error; @@ -1394,13 +1448,13 @@ static int may_delete(struct inode *dir, BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(victim->d_name.name, victim->d_inode, dir); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = permission(dir,MAY_WRITE | MAY_EXEC, nd); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1531,6 +1585,14 @@ int may_open(struct nameidata *nd, int a if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; +#ifdef CONFIG_VSERVER_COWBL + if (IS_COW(inode) && (flag & FMODE_WRITE)) { + if (IS_COW_LINK(inode)) + return -EMLINK; + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + mark_inode_dirty(inode); + } +#endif error = vfs_permission(nd, acc_mode); if (error) return error; @@ -1547,7 +1609,8 @@ int may_open(struct nameidata *nd, int a return -EACCES; flag &= ~O_TRUNC; - } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) + } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + && (flag & FMODE_WRITE)) return -EROFS; /* * An append-only file must be opened in append mode for writing. @@ -1635,6 +1698,11 @@ int open_namei(int dfd, const char *path struct dentry *dir; int count = 0; +#ifdef CONFIG_VSERVER_COWBL + int rflag = flag; + int rmode = mode; +restart: +#endif acc_mode = ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ @@ -1728,6 +1796,22 @@ do_last: goto exit; ok: error = may_open(nd, acc_mode, flag); +#ifdef CONFIG_VSERVER_COWBL + if (error == -EMLINK) { + struct dentry *dentry; + dentry = cow_break_link(pathname); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto exit; + } + dput(dentry); + release_open_intent(nd); + path_release(nd); + flag = rflag; + mode = rmode; + goto restart; + } +#endif if (error) goto exit; return 0; @@ -1839,9 +1923,10 @@ fail: } EXPORT_SYMBOL_GPL(lookup_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int vfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1891,11 +1976,12 @@ asmlinkage long sys_mknodat(int dfd, con error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode, - new_decode_dev(dev)); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + new_decode_dev(dev), &nd); break; case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); + error = vfs_mknod(nd.dentry->d_inode, dentry, mode, + 0, &nd); break; case S_IFDIR: error = -EPERM; @@ -1918,9 +2004,10 @@ asmlinkage long sys_mknod(const char __u return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int vfs_mkdir(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1962,7 +2049,7 @@ asmlinkage long sys_mkdirat(int dfd, con if (!IS_POSIXACL(nd.dentry->d_inode)) mode &= ~current->fs->umask; - error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); + error = vfs_mkdir(nd.dentry->d_inode, dentry, mode, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2006,9 +2093,10 @@ void dentry_unhash(struct dentry *dentry spin_unlock(&dcache_lock); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(dir, dentry, 1, nd); if (error) return error; @@ -2070,7 +2158,7 @@ static long do_rmdir(int dfd, const char error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; - error = vfs_rmdir(nd.dentry->d_inode, dentry); + error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd); dput(dentry); exit2: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2086,9 +2174,10 @@ asmlinkage long sys_rmdir(const char __u return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +int vfs_unlink(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - int error = may_delete(dir, dentry, 0); + int error = may_delete(dir, dentry, 0, nd); if (error) return error; @@ -2150,7 +2239,7 @@ static long do_unlinkat(int dfd, const c inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); - error = vfs_unlink(nd.dentry->d_inode, dentry); + error = vfs_unlink(nd.dentry->d_inode, dentry, &nd); exit2: dput(dentry); } @@ -2185,9 +2274,10 @@ asmlinkage long sys_unlink(const char __ return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -2231,7 +2321,7 @@ asmlinkage long sys_symlinkat(const char if (IS_ERR(dentry)) goto out_unlock; - error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO, &nd); dput(dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2248,7 +2338,8 @@ asmlinkage long sys_symlink(const char _ return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +int vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, struct nameidata *nd) { struct inode *inode = old_dentry->d_inode; int error; @@ -2256,7 +2347,7 @@ int vfs_link(struct dentry *old_dentry, if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry, nd); if (error) return error; @@ -2266,7 +2357,7 @@ int vfs_link(struct dentry *old_dentry, /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; @@ -2326,7 +2417,7 @@ asmlinkage long sys_linkat(int olddfd, c error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_unlock; - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry, &nd); dput(new_dentry); out_unlock: mutex_unlock(&nd.dentry->d_inode->i_mutex); @@ -2458,14 +2549,14 @@ int vfs_rename(struct inode *old_dir, st if (old_dentry->d_inode == new_dentry->d_inode) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(old_dir, old_dentry, is_dir, NULL); if (error) return error; if (!new_dentry->d_inode) error = may_create(new_dir, new_dentry, NULL); else - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(new_dir, new_dentry, is_dir, NULL); if (error) return error; @@ -2543,6 +2634,9 @@ static int do_rename(int olddfd, const c error = -EINVAL; if (old_dentry == trap) goto exit4; + error = -EROFS; + if (MNT_IS_RDONLY(newnd.mnt)) + goto exit4; new_dentry = lookup_hash(&newnd); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) @@ -2636,6 +2730,126 @@ int vfs_follow_link(struct nameidata *nd return __vfs_follow_link(nd, link); } + +#ifdef CONFIG_VSERVER_COWBL + +#include + +struct dentry *cow_break_link(const char *pathname) +{ + int ret, mode, pathlen; + struct nameidata old_nd, dir_nd; + struct dentry *old_dentry, *new_dentry; + struct dentry *res = ERR_PTR(-EMLINK); + struct vfsmount *old_mnt, *new_mnt; + struct file *old_file; + struct file *new_file; + char *to, *path, pad='\251'; + loff_t size; + + vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname); + path = kmalloc(PATH_MAX, GFP_KERNEL); + + ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd); + vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret); + old_dentry = old_nd.dentry; + old_mnt = old_nd.mnt; + mode = old_dentry->d_inode->i_mode; + + to = d_path(old_dentry, old_mnt, path, PATH_MAX-2); + pathlen = strlen(to); + vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to); + + to[pathlen+1] = 0; +retry: + to[pathlen] = pad--; + if (pad <= '\240') + goto out_rel_old; + + vxdprintk(VXD_CBIT(misc, 1), "temp copy »%s«", to); + ret = path_lookup(to, + LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd); + + /* this puppy downs the inode sem */ + new_dentry = lookup_create(&dir_nd, 0); + vxdprintk(VXD_CBIT(misc, 2), + "lookup_create(new): %p", new_dentry); + if (!new_dentry) { + path_release(&dir_nd); + goto retry; + } + + ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd); + vxdprintk(VXD_CBIT(misc, 2), + "vfs_create(new): %d", ret); + if (ret == -EEXIST) { + + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + path_release(&dir_nd); + goto retry; + } + + new_mnt = dir_nd.mnt; + + dget(old_dentry); + mntget(old_mnt); + /* this one cleans up the dentry in case of failure */ + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(old): %p", old_file); + if (!old_file) + goto out_rel_both; + + dget(new_dentry); + mntget(new_mnt); + /* this one cleans up the dentry in case of failure */ + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + vxdprintk(VXD_CBIT(misc, 2), + "dentry_open(new): %p", new_file); + if (!new_file) + goto out_fput_old; + + size = i_size_read(old_file->f_dentry->d_inode); + ret = vfs_sendfile(new_file, old_file, NULL, size, 0); + vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret); + + if (ret < 0) + goto out_fput_both; + + ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry, + old_nd.dentry->d_parent->d_inode, old_dentry); + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret); + if (!ret) { + res = new_dentry; + dget(new_dentry); + } + +out_fput_both: + vxdprintk(VXD_CBIT(misc, 3), + "fput(new_file=%p[#%d])", new_file, + atomic_read(&new_file->f_count)); + fput(new_file); + +out_fput_old: + vxdprintk(VXD_CBIT(misc, 3), + "fput(old_file=%p[#%d])", old_file, + atomic_read(&old_file->f_count)); + fput(old_file); + +out_rel_both: + mutex_unlock(&dir_nd.dentry->d_inode->i_mutex); + dput(new_dentry); + + path_release(&dir_nd); +out_rel_old: + path_release(&old_nd); + kfree(path); + return res; +} + +#endif + /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { diff -NurpP --minimal linux-2.6.19/fs/namespace.c linux-2.6.19-vs2.1.x-t1/fs/namespace.c --- linux-2.6.19/fs/namespace.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/namespace.c 2006-11-30 18:53:18 +0100 @@ -25,6 +25,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include "pnode.h" @@ -241,6 +245,7 @@ static struct vfsmount *clone_mnt(struct mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_tag = old->mnt_tag; if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -349,43 +354,85 @@ static inline void mangle(struct seq_fil seq_escape(m, s, " \t\n\\"); } +static int mnt_is_reachable(struct vfsmount *mnt) +{ + struct vfsmount *root_mnt; + struct dentry *root, *point; + int ret; + + if (mnt == mnt->mnt_namespace->root) + return 1; + + spin_lock(&dcache_lock); + root_mnt = current->fs->rootmnt; + root = current->fs->root; + point = root; + + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) { + point = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } + + ret = (mnt == root_mnt) && is_subdir(point, root); + + spin_unlock(&dcache_lock); + + return ret; +} + static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { - int flag; - char *str; + int s_flag; + int mnt_flag; + char *set_str; + char *unset_str; } fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_DIRSYNC, ",dirsync" }, - { MS_MANDLOCK, ",mand" }, - { 0, NULL } - }; - static struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { MNT_NOATIME, ",noatime" }, - { MNT_NODIRATIME, ",nodiratime" }, - { 0, NULL } + { MS_RDONLY, MNT_RDONLY, "ro", "rw" }, + { MS_SYNCHRONOUS, 0, ",sync", NULL }, + { MS_DIRSYNC, 0, ",dirsync", NULL }, + { MS_MANDLOCK, 0, ",mand", NULL }, + { MS_TAGGED, 0, ",tag", NULL }, + { MS_NOATIME, MNT_NOATIME, ",noatime", NULL }, + { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL }, + { 0, MNT_NOSUID, ",nosuid", NULL }, + { 0, MNT_NODEV, ",nodev", NULL }, + { 0, MNT_NOEXEC, ",noexec", NULL }, + { 0, 0, NULL, NULL } }; - struct proc_fs_info *fs_infop; + struct proc_fs_info *p; + unsigned long s_flags = mnt->mnt_sb->s_flags; + int mnt_flags = mnt->mnt_flags; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); - seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); - mangle(m, mnt->mnt_sb->s_type->name); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_sb->s_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return 0; + + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "/dev/root / "); + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); + mangle(m, mnt->mnt_sb->s_type->name); + seq_putc(m, ' '); + for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) { + if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) { + if (p->set_str) + seq_puts(m, p->set_str); + } else { + if (p->unset_str) + seq_puts(m, p->unset_str); + } } + if (mnt->mnt_flags & MNT_TAGID) + seq_printf(m, ",tag=%d", mnt->mnt_tag); if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); @@ -404,17 +451,27 @@ static int show_vfsstat(struct seq_file struct vfsmount *mnt = v; int err = 0; - /* device */ - if (mnt->mnt_devname) { - seq_puts(m, "device "); - mangle(m, mnt->mnt_devname); - } else - seq_puts(m, "no device"); + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P)) + return 0; - /* mount point */ - seq_puts(m, " mounted on "); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); - seq_putc(m, ' '); + if (!vx_check(0, VS_ADMIN|VS_WATCH) && + mnt == current->fs->rootmnt) { + seq_puts(m, "device /dev/root mounted on / "); + } else { + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + } /* file system type */ seq_puts(m, "with fstype "); @@ -595,7 +652,7 @@ static int do_umount(struct vfsmount *mn down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) { lock_kernel(); - DQUOT_OFF(sb); + DQUOT_OFF(sb->s_dqh); retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); unlock_kernel(); } @@ -644,7 +701,7 @@ asmlinkage long sys_umount(char __user * goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -668,7 +725,7 @@ asmlinkage long sys_oldumount(char __use static int mount_is_safe(struct nameidata *nd) { - if (capable(CAP_SYS_ADMIN)) + if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return 0; return -EPERM; #ifdef notyet @@ -897,11 +954,13 @@ static int do_change_type(struct nameida /* * do loopback mount. */ -static int do_loopback(struct nameidata *nd, char *old_name, int recurse) +static int do_loopback(struct nameidata *nd, char *old_name, tag_t tag, + unsigned long flags, int mnt_flags) { struct nameidata old_nd; struct vfsmount *mnt = NULL; int err = mount_is_safe(nd); + int recurse = flags & MS_REC; if (err) return err; if (!old_name || !*old_name) @@ -927,6 +986,12 @@ static int do_loopback(struct nameidata if (!mnt) goto out; + mnt->mnt_flags = mnt_flags; + if (flags & MS_TAGID) { + mnt->mnt_tag = tag; + mnt->mnt_flags |= MNT_TAGID; + } + err = graft_tree(mnt, nd); if (err) { LIST_HEAD(umount_list); @@ -935,6 +1000,7 @@ static int do_loopback(struct nameidata spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } + mnt->mnt_flags = mnt_flags; out: up_write(&namespace_sem); @@ -948,12 +1014,12 @@ out: * on it - tough luck. */ static int do_remount(struct nameidata *nd, int flags, int mnt_flags, - void *data) + void *data, xid_t xid) { int err; struct super_block *sb = nd->mnt->mnt_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT)) return -EPERM; if (!check_mnt(nd->mnt)) @@ -987,7 +1053,7 @@ static int do_move_mount(struct nameidat struct nameidata old_nd, parent_nd; struct vfsmount *p; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1067,7 +1133,7 @@ static int do_new_mount(struct nameidata return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); @@ -1379,6 +1445,7 @@ long do_mount(char *dev_name, char *dir_ struct nameidata nd; int retval = 0; int mnt_flags = 0; + tag_t tag = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -1394,7 +1461,19 @@ long do_mount(char *dev_name, char *dir_ if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; +#ifdef CONFIG_PROPAGATE + retval = dx_parse_tag(data_page, &tag, 1); + if (retval) { + mnt_flags |= MNT_TAGID; + /* bind and re-mounts get the tag flag */ + if (flags & (MS_BIND|MS_REMOUNT)) + flags |= MS_TAGID; + } +#endif + /* Separate the per-mountpoint flags */ + if (flags & MS_RDONLY) + mnt_flags |= MNT_RDONLY; if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) @@ -1406,6 +1485,8 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; + if (!capable(CAP_SYS_ADMIN)) + mnt_flags |= MNT_NODEV; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME | MS_NODIRATIME); @@ -1420,9 +1501,9 @@ long do_mount(char *dev_name, char *dir_ if (flags & MS_REMOUNT) retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); + data_page, tag); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&nd, dev_name, tag, flags, mnt_flags); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) @@ -1520,7 +1601,7 @@ int copy_namespace(int flags, struct tas if (!(flags & CLONE_NEWNS)) return 0; - if (!capable(CAP_SYS_ADMIN)) { + if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) { err = -EPERM; goto out; } diff -NurpP --minimal linux-2.6.19/fs/nfs/client.c linux-2.6.19-vs2.1.x-t1/fs/nfs/client.c --- linux-2.6.19/fs/nfs/client.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/client.c 2006-11-08 04:57:47 +0100 @@ -520,6 +520,9 @@ static int nfs_init_server_rpcclient(str if (server->flags & NFS4_MOUNT_INTR) server->client->cl_intr = 1; + server->client->cl_tag = 0; + if (server->flags & NFS_MOUNT_TAGGED) + server->client->cl_tag = 1; return 0; } @@ -676,6 +679,10 @@ static void nfs_server_set_fsinfo(struct server->acdirmin = server->acdirmax = 0; } + /* FIXME: needs fsinfo + if (server->flags & NFS_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; */ + server->maxfilesize = fsinfo->maxfilesize; /* We're airborne Set socket buffersize */ diff -NurpP --minimal linux-2.6.19/fs/nfs/dir.c linux-2.6.19-vs2.1.x-t1/fs/nfs/dir.c --- linux-2.6.19/fs/nfs/dir.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/dir.c 2006-11-30 19:09:28 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -933,6 +934,7 @@ static struct dentry *nfs_lookup(struct if (IS_ERR(res)) goto out_unlock; + dx_propagate_tag(nd, inode); no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { @@ -975,7 +977,8 @@ static int is_atomic_open(struct inode * if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ - if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) && + (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) return 0; return 1; } diff -NurpP --minimal linux-2.6.19/fs/nfs/inode.c linux-2.6.19-vs2.1.x-t1/fs/nfs/inode.c --- linux-2.6.19/fs/nfs/inode.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/inode.c 2006-11-30 18:53:18 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -279,8 +280,10 @@ nfs_fhget(struct super_block *sb, struct nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* * report the blocks in 512byte units @@ -369,6 +372,8 @@ void nfs_setattr_update_inode(struct ino inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); @@ -778,6 +783,9 @@ static int nfs_check_inode_attributes(st struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; int data_unstable; + uid_t uid; + gid_t gid; + tag_t tag; /* Has the inode gone and changed behind our back? */ @@ -805,10 +813,15 @@ static int nfs_check_inode_attributes(st if (cur_size != new_isize && nfsi->npages == 0) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + || inode->i_uid != uid + || inode->i_gid != gid + || inode->i_tag != tag) nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ @@ -898,6 +911,9 @@ static int nfs_update_inode(struct inode loff_t cur_isize, new_isize; unsigned int invalid = 0; int data_stable; + uid_t uid; + gid_t gid; + tag_t tag; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -970,15 +986,21 @@ static int nfs_update_inode(struct inode } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid); + gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid); + tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0); + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) + inode->i_uid != uid || + inode->i_gid != gid || + inode->i_tag != tag) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_tag = tag; if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* diff -NurpP --minimal linux-2.6.19/fs/nfs/nfs3xdr.c linux-2.6.19-vs2.1.x-t1/fs/nfs/nfs3xdr.c --- linux-2.6.19/fs/nfs/nfs3xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/nfs3xdr.c 2006-11-30 18:53:18 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -178,7 +179,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_f } static inline __be32 * -xdr_encode_sattr(__be32 *p, struct iattr *attr) +xdr_encode_sattr(__be32 *p, struct iattr *attr, int tag) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -186,15 +187,17 @@ xdr_encode_sattr(__be32 *p, struct iattr } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID) { + if (attr->ia_valid & ATTR_UID || + (tag && (attr->ia_valid & ATTR_TAG))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); + *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID) { + if (attr->ia_valid & ATTR_GID || + (tag && (attr->ia_valid & ATTR_TAG))) { *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); + *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag)); } else { *p++ = xdr_zero; } @@ -279,7 +282,8 @@ static int nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -370,7 +374,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -384,7 +389,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -397,7 +403,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); *p++ = htonl(args->pathlen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -415,7 +422,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tag); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); diff -NurpP --minimal linux-2.6.19/fs/nfs/nfsroot.c linux-2.6.19-vs2.1.x-t1/fs/nfs/nfsroot.c --- linux-2.6.19/fs/nfs/nfsroot.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/nfsroot.c 2006-11-08 04:57:47 +0100 @@ -86,6 +86,7 @@ #include #include #include +#include /* Define this to allow debugging output */ #undef NFSROOT_DEBUG @@ -118,12 +119,12 @@ static int mount_port __initdata = 0; / enum { /* Options that take integer arguments */ Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, - Opt_acregmax, Opt_acdirmin, Opt_acdirmax, + Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid, /* Options that take no arguments */ Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_acl, Opt_noacl, + Opt_acl, Opt_noacl, Opt_tag, Opt_notag, /* Error token */ Opt_err }; @@ -160,6 +161,10 @@ static match_table_t __initdata tokens = {Opt_tcp, "tcp"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tag, "tag"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, + {Opt_tag, "tagxid"}, {Opt_err, NULL} }; @@ -274,6 +279,20 @@ static int __init root_nfs_parse(char *n case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + nfs_data.flags |= NFS_MOUNT_TAGGED; + break; + case Opt_notag: + nfs_data.flags &= ~NFS_MOUNT_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + nfs_data.flags |= NFS_MOUNT_TAGGED; + break; +#endif default: printk(KERN_WARNING "Root-NFS: unknown " "option: %s\n", p); diff -NurpP --minimal linux-2.6.19/fs/nfs/super.c linux-2.6.19-vs2.1.x-t1/fs/nfs/super.c --- linux-2.6.19/fs/nfs/super.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfs/super.c 2006-11-30 18:53:18 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -290,6 +291,7 @@ static void nfs_show_mount_options(struc { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_TAGGED, ",tag", "" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; diff -NurpP --minimal linux-2.6.19/fs/nfsd/auth.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/auth.c --- linux-2.6.19/fs/nfsd/auth.c 2006-06-18 04:54:42 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/auth.c 2006-11-30 18:53:18 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) @@ -41,19 +42,22 @@ int nfsd_setuser(struct svc_rqst *rqstp, get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + current->fsuid = INOTAG_UID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + current->fsgid = INOTAG_GID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid); else current->fsgid = exp->ex_anon_gid; + /* this desperately needs a tag :) */ + current->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0); + if (!cred.cr_group_info) return -ENOMEM; ret = set_current_groups(cred.cr_group_info); put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { + if (INOTAG_UID(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & diff -NurpP --minimal linux-2.6.19/fs/nfsd/nfs3xdr.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs3xdr.c --- linux-2.6.19/fs/nfsd/nfs3xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs3xdr.c 2006-11-30 18:53:18 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -111,6 +112,8 @@ static inline __be32 * decode_sattr3(__be32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -120,12 +123,15 @@ decode_sattr3(__be32 *p, struct iattr *i } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid); + iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid); + iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0); if (*p++) { u64 newsize; @@ -163,8 +169,10 @@ encode_fattr3(struct svc_rqst *rqstp, __ *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag))); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { diff -NurpP --minimal linux-2.6.19/fs/nfsd/nfs4recover.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs4recover.c --- linux-2.6.19/fs/nfsd/nfs4recover.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs4recover.c 2006-11-08 21:53:01 +0100 @@ -156,7 +156,7 @@ nfsd4_create_clid_dir(struct nfs4_client dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU, NULL); out_put: dput(dentry); out_unlock: @@ -260,7 +260,7 @@ nfsd4_remove_clid_file(struct dentry *di return -EINVAL; } mutex_lock(&dir->d_inode->i_mutex); - status = vfs_unlink(dir->d_inode, dentry); + status = vfs_unlink(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -275,7 +275,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_rmdir(dir->d_inode, dentry); + status = vfs_rmdir(dir->d_inode, dentry, NULL); mutex_unlock(&dir->d_inode->i_mutex); return status; } diff -NurpP --minimal linux-2.6.19/fs/nfsd/nfs4xdr.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs4xdr.c --- linux-2.6.19/fs/nfsd/nfs4xdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfs4xdr.c 2006-11-30 18:53:18 +0100 @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -1727,14 +1728,18 @@ out_acl: WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), + stat.uid, stat.tag), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), + stat.gid, stat.tag), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) diff -NurpP --minimal linux-2.6.19/fs/nfsd/nfsxdr.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfsxdr.c --- linux-2.6.19/fs/nfsd/nfsxdr.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/nfsxdr.c 2006-11-30 18:53:18 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -102,6 +103,8 @@ static inline __be32 * decode_sattr(__be32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -115,12 +118,15 @@ decode_sattr(__be32 *p, struct iattr *ia } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid); + iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid); + iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -164,8 +170,10 @@ encode_fattr(struct svc_rqst *rqstp, __b *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) nfsd_ruid(rqstp, + TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag))); + *p++ = htonl((u32) nfsd_rgid(rqstp, + TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag))); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); diff -NurpP --minimal linux-2.6.19/fs/nfsd/vfs.c linux-2.6.19-vs2.1.x-t1/fs/nfsd/vfs.c --- linux-2.6.19/fs/nfsd/vfs.c 2006-11-30 21:19:26 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/nfsd/vfs.c 2006-11-20 21:12:32 +0100 @@ -1183,13 +1183,13 @@ nfsd_create(struct svc_rqst *rqstp, stru host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: - host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + host_err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL); break; default: printk("nfsd: bad file type %o in nfsd_create\n", type); @@ -1474,11 +1474,13 @@ nfsd_symlink(struct svc_rqst *rqstp, str else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, + path_alloced, mode, NULL); kfree(path_alloced); } } else - host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, + path, mode, NULL); if (!host_err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1537,7 +1539,7 @@ nfsd_link(struct svc_rqst *rqstp, struct dold = tfhp->fh_dentry; dest = dold->d_inode; - host_err = vfs_link(dold, dirp, dnew); + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { if (EX_ISSYNC(ffhp->fh_export)) { err = nfserrno(nfsd_sync_dir(ddir)); @@ -1702,9 +1704,9 @@ nfsd_unlink(struct svc_rqst *rqstp, stru host_err = -EPERM; } else #endif - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); } else { /* It's RMDIR */ - host_err = vfs_rmdir(dirp, rdentry); + host_err = vfs_rmdir(dirp, rdentry, NULL); } dput(rdentry); @@ -1815,7 +1817,8 @@ nfsd_permission(struct svc_export *exp, */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp) || IS_RDONLY(inode)) + if (EX_RDONLY(exp) || IS_RDONLY(inode) + || MNT_IS_RDONLY(exp->ex_mnt)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; diff -NurpP --minimal linux-2.6.19/fs/ocfs2/dlm/dlmfs.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/dlm/dlmfs.c --- linux-2.6.19/fs/ocfs2/dlm/dlmfs.c 2006-11-30 21:19:27 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/dlm/dlmfs.c 2006-11-08 04:57:40 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include diff -NurpP --minimal linux-2.6.19/fs/ocfs2/dlmglue.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/dlmglue.c --- linux-2.6.19/fs/ocfs2/dlmglue.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/dlmglue.c 2006-11-08 04:57:52 +0100 @@ -1377,8 +1377,12 @@ static void ocfs2_refresh_inode_from_lvb inode->i_blocks = ocfs2_align_bytes_to_sectors(i_size_read(inode)); + oi->ip_flags &= ~OCFS2_FL_MASK; + oi->ip_flags |= be32_to_cpu(lvb->lvb_iflags) & OCFS2_FL_MASK; + inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); + inode->i_tag = be16_to_cpu(lvb->lvb_itag); inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); ocfs2_unpack_timespec(&inode->i_atime, diff -NurpP --minimal linux-2.6.19/fs/ocfs2/file.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/file.c --- linux-2.6.19/fs/ocfs2/file.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/file.c 2006-11-08 04:57:52 +0100 @@ -800,13 +800,15 @@ int ocfs2_setattr(struct dentry *dentry, mlog(0, "uid change: %d\n", attr->ia_uid); if (attr->ia_valid & ATTR_GID) mlog(0, "gid change: %d\n", attr->ia_gid); + if (attr->ia_valid & ATTR_TAG) + mlog(0, "tag change: %d\n", attr->ia_tag); if (attr->ia_valid & ATTR_SIZE) mlog(0, "size change...\n"); if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) mlog(0, "time change...\n"); #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ - | ATTR_GID | ATTR_UID | ATTR_MODE) + | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE) if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); return 0; @@ -1220,6 +1222,7 @@ bail: struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .sync_flags = ocfs2_sync_flags, }; struct inode_operations ocfs2_special_file_iops = { diff -NurpP --minimal linux-2.6.19/fs/ocfs2/inode.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/inode.c --- linux-2.6.19/fs/ocfs2/inode.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/inode.c 2006-11-08 04:57:52 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -43,6 +44,7 @@ #include "file.h" #include "heartbeat.h" #include "inode.h" +#include "ioctl.h" #include "journal.h" #include "namei.h" #include "suballoc.h" @@ -236,6 +238,8 @@ int ocfs2_populate_inode(struct inode *i struct super_block *sb; struct ocfs2_super *osb; int status = -EINVAL; + uid_t uid; + gid_t gid; mlog_entry("(0x%p, size:%llu)\n", inode, (unsigned long long)fe->i_size); @@ -267,8 +271,12 @@ int ocfs2_populate_inode(struct inode *i inode->i_generation = le32_to_cpu(fe->i_generation); inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); inode->i_mode = le16_to_cpu(fe->i_mode); - inode->i_uid = le32_to_cpu(fe->i_uid); - inode->i_gid = le32_to_cpu(fe->i_gid); + uid = le32_to_cpu(fe->i_uid); + gid = le32_to_cpu(fe->i_gid); + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0); /* Fast symlinks will have i_size but no allocated clusters. */ if (S_ISLNK(inode->i_mode) && !fe->i_clusters) @@ -1223,13 +1231,18 @@ int ocfs2_mark_inode_dirty(struct ocfs2_ spin_lock(&OCFS2_I(inode)->ip_lock); fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); + /* fe->i_flags &= cpu_to_le32(~OCFS2_FL_MASK); + fe->i_flags |= cpu_to_le32(OCFS2_I(inode)->ip_flags & OCFS2_FL_MASK); */ fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); spin_unlock(&OCFS2_I(inode)->ip_lock); fe->i_size = cpu_to_le64(i_size_read(inode)); fe->i_links_count = cpu_to_le16(inode->i_nlink); - fe->i_uid = cpu_to_le32(inode->i_uid); - fe->i_gid = cpu_to_le32(inode->i_gid); + fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), + inode->i_uid, inode->i_tag)); + fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), + inode->i_gid, inode->i_tag)); + /* i_tag = = cpu_to_le16(inode->i_tag); */ fe->i_mode = cpu_to_le16(inode->i_mode); fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); @@ -1261,11 +1274,17 @@ void ocfs2_refresh_inode(struct inode *i OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); + /* OCFS2_I(inode)->ip_flags &= ~OCFS2_FL_MASK; + OCFS2_I(inode)->ip_flags |= le32_to_cpu(fe->i_flags) & OCFS2_FL_MASK; */ ocfs2_set_inode_flags(inode); i_size_write(inode, le64_to_cpu(fe->i_size)); inode->i_nlink = le16_to_cpu(fe->i_links_count); - inode->i_uid = le32_to_cpu(fe->i_uid); - inode->i_gid = le32_to_cpu(fe->i_gid); + uid = le32_to_cpu(fe->i_uid); + gid = le32_to_cpu(fe->i_gid); + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, + /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0); inode->i_mode = le16_to_cpu(fe->i_mode); if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) inode->i_blocks = 0; diff -NurpP --minimal linux-2.6.19/fs/ocfs2/inode.h linux-2.6.19-vs2.1.x-t1/fs/ocfs2/inode.h --- linux-2.6.19/fs/ocfs2/inode.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/inode.h 2006-11-08 04:57:46 +0100 @@ -150,5 +150,6 @@ int ocfs2_aio_read(struct file *file, st int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); void ocfs2_set_inode_flags(struct inode *inode); +int ocfs2_sync_flags(struct inode *inode); #endif /* OCFS2_INODE_H */ diff -NurpP --minimal linux-2.6.19/fs/ocfs2/namei.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/namei.c --- linux-2.6.19/fs/ocfs2/namei.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/namei.c 2006-11-08 04:57:52 +0100 @@ -40,6 +40,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -497,6 +498,9 @@ static int ocfs2_mknod_locked(struct ocf u64 fe_blkno = 0; u16 suballoc_bit; struct inode *inode = NULL; + uid_t uid; + gid_t gid; + tag_t tag; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -556,13 +560,19 @@ static int ocfs2_mknod_locked(struct ocf fe->i_blkno = cpu_to_le64(fe_blkno); fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); - fe->i_uid = cpu_to_le32(current->fsuid); + + tag = dx_current_fstag(osb->sb); + uid = current->fsuid; if (dir->i_mode & S_ISGID) { - fe->i_gid = cpu_to_le32(dir->i_gid); + gid = dir->i_gid; if (S_ISDIR(mode)) mode |= S_ISGID; } else - fe->i_gid = cpu_to_le32(current->fsgid); + gid = current->fsgid; + + fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), uid, tag)); + fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), gid, tag)); + inode->i_tag = tag; fe->i_mode = cpu_to_le16(mode); if (S_ISCHR(mode) || S_ISBLK(mode)) fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); @@ -2300,4 +2310,5 @@ struct inode_operations ocfs2_dir_iops = .rename = ocfs2_rename, .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .sync_flags = ocfs2_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/ocfs2/ocfs2.h linux-2.6.19-vs2.1.x-t1/fs/ocfs2/ocfs2.h --- linux-2.6.19/fs/ocfs2/ocfs2.h 2006-09-20 16:58:35 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/ocfs2.h 2006-11-08 04:57:52 +0100 @@ -174,6 +174,7 @@ enum ocfs2_mount_options OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ + OCFS2_MOUNT_TAGGED = 1 << 8, /* use tagging */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff -NurpP --minimal linux-2.6.19/fs/ocfs2/super.c linux-2.6.19-vs2.1.x-t1/fs/ocfs2/super.c --- linux-2.6.19/fs/ocfs2/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ocfs2/super.c 2006-11-08 04:57:52 +0100 @@ -141,6 +141,7 @@ enum { Opt_hb_local, Opt_data_ordered, Opt_data_writeback, + Opt_tag, Opt_notag, Opt_tagid, Opt_err, }; @@ -154,6 +155,10 @@ static match_table_t tokens = { {Opt_hb_local, OCFS2_HB_LOCAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_tag, "tag"}, + {Opt_tag, "tagxid"}, + {Opt_notag, "notag"}, + {Opt_tagid, "tagid=%u"}, {Opt_err, NULL} }; @@ -362,6 +367,14 @@ static int ocfs2_remount(struct super_bl goto out; } + printk("ocfs2_remount: %lx,%lx\n", osb->s_mount_opt, sb->s_flags); + if ((parsed_options & OCFS2_MOUNT_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + ret = -EINVAL; + mlog(ML_ERROR, "Cannot change tagging on remount\n"); + goto out; + } + if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != (parsed_options & OCFS2_MOUNT_HB_LOCAL)) { ret = -EINVAL; @@ -635,6 +648,9 @@ static int ocfs2_fill_super(struct super ocfs2_complete_mount_recovery(osb); + if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED) + sb->s_flags |= MS_TAGGED; + printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " "with %s data mode.\n", osb->dev_str, osb->node_num, osb->slot_num, @@ -747,6 +763,20 @@ static int ocfs2_parse_options(struct su case Opt_data_writeback: *mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; break; +#ifndef CONFIG_TAGGING_NONE + case Opt_tag: + *mount_opt |= OCFS2_MOUNT_TAGGED; + break; + case Opt_notag: + *mount_opt &= ~OCFS2_MOUNT_TAGGED; + break; +#endif +#ifdef CONFIG_PROPAGATE + case Opt_tagid: + /* use args[0] */ + *mount_opt |= OCFS2_MOUNT_TAGGED; + break; +#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " diff -NurpP --minimal linux-2.6.19/fs/open.c linux-2.6.19-vs2.1.x-t1/fs/open.c --- linux-2.6.19/fs/open.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/open.c 2006-11-30 18:53:18 +0100 @@ -27,22 +27,31 @@ #include #include #include +#include +#include +#include +#include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { int retval = -ENODEV; if (dentry) { + struct super_block *sb = dentry->d_sb; + retval = -ENOSYS; - if (dentry->d_sb->s_op->statfs) { + if (sb->s_op->statfs) { memset(buf, 0, sizeof(*buf)); retval = security_sb_statfs(dentry); if (retval) return retval; - retval = dentry->d_sb->s_op->statfs(dentry, buf); + retval = sb->s_op->statfs(dentry, buf); if (retval == 0 && buf->f_frsize == 0) buf->f_frsize = buf->f_bsize; } + if (!vx_check(0, VS_ADMIN|VS_WATCH)) + vx_vsi_statfs(sb, buf); } return retval; } @@ -246,7 +255,7 @@ static long do_sys_truncate(const char _ goto dput_and_out; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; error = -EPERM; @@ -395,7 +404,7 @@ asmlinkage long sys_faccessat(int dfd, c special_file(nd.dentry->d_inode->i_mode)) goto out_path_release; - if(IS_RDONLY(nd.dentry->d_inode)) + if(IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt)) res = -EROFS; out_path_release: @@ -509,7 +518,7 @@ asmlinkage long sys_fchmod(unsigned int audit_inode(NULL, inode); err = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt)) goto out_putf; err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -539,11 +548,11 @@ asmlinkage long sys_fchmodat(int dfd, co error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -568,7 +577,8 @@ asmlinkage long sys_chmod(const char __u return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct dentry *dentry, struct vfsmount *mnt, + uid_t user, gid_t group) { struct inode * inode; int error; @@ -580,7 +590,7 @@ static int chown_common(struct dentry * goto out; } error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -588,11 +598,11 @@ static int chown_common(struct dentry * newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = dx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = dx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -611,7 +621,11 @@ asmlinkage long sys_chown(const char __u error = user_path_walk(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -631,7 +645,11 @@ asmlinkage long sys_fchownat(int dfd, co error = __user_walk_fd(dfd, filename, follow, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -645,7 +663,11 @@ asmlinkage long sys_lchown(const char __ error = user_path_walk_link(filename, &nd); if (error) goto out; - error = chown_common(nd.dentry, user, group); +#ifdef CONFIG_VSERVER_COWBL + error = cow_check_and_break(&nd); + if (!error) +#endif + error = chown_common(nd.dentry, nd.mnt, user, group); path_release(&nd); out: return error; @@ -664,7 +686,7 @@ asmlinkage long sys_fchown(unsigned int dentry = file->f_dentry; audit_inode(NULL, dentry->d_inode); - error = chown_common(dentry, user, group); + error = chown_common(dentry, file->f_vfsmnt, user, group); fput(file); out: return error; @@ -892,6 +914,7 @@ repeat: FD_SET(fd, fdt->open_fds); FD_CLR(fd, fdt->close_on_exec); files->next_fd = fd + 1; + vx_openfd_inc(fd); #if 1 /* Sanity check */ if (fdt->fd[fd] != NULL) { @@ -914,6 +937,7 @@ static void __put_unused_fd(struct files __FD_CLR(fd, fdt->open_fds); if (fd < files->next_fd) files->next_fd = fd; + vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) diff -NurpP --minimal linux-2.6.19/fs/proc/array.c linux-2.6.19-vs2.1.x-t1/fs/proc/array.c --- linux-2.6.19/fs/proc/array.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/array.c 2006-11-08 20:12:16 +0100 @@ -75,6 +75,8 @@ #include #include #include +#include +#include #include #include @@ -135,7 +137,9 @@ static const char *task_state_array[] = "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "N (noninteractive)", /* 64 */ + "H (on hold)" /* 128 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -144,7 +148,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; @@ -161,8 +166,14 @@ static inline char * task_state(struct t struct group_info *group_info; int g; struct fdtable *fdt = NULL; + pid_t pid, ptgid, tppid, tgid; rcu_read_lock(); + tgid = vx_map_tgid(p->tgid); + pid = vx_map_pid(p->pid); + ptgid = vx_map_pid(rcu_dereference(p->real_parent)->tgid); + tppid = vx_map_pid(rcu_dereference(p->parent)->pid); + buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -174,9 +185,8 @@ static inline char * task_state(struct t "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, p->pid, - pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, - pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, + tgid, pid, (pid > 1) ? ptgid : 0, + pid_alive(p) && p->ptrace ? tppid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -283,17 +293,26 @@ static inline char * task_sig(struct tas static inline char *task_cap(struct task_struct *p, char *buffer) { - return buffer + sprintf(buffer, "CapInh:\t%016x\n" - "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", - cap_t(p->cap_inheritable), - cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + struct vx_info *vxi = p->vx_info; + + return buffer + sprintf(buffer, + "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + (unsigned)vx_info_mbcap(vxi, p->cap_inheritable), + (unsigned)vx_info_mbcap(vxi, p->cap_permitted), + (unsigned)vx_info_mbcap(vxi, p->cap_effective)); } int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -306,6 +325,46 @@ int proc_pid_status(struct task_struct * buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); + + if (task_vx_flags(task, VXF_HIDE_VINFO, 0)) + goto skip; +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,(unsigned long long)vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); +#else + buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task)); +#endif +#ifdef CONFIG_VSERVER_LEGACYNET + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif +skip: #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif @@ -320,7 +379,7 @@ static int do_task_stat(struct task_stru sigset_t sigign, sigcatch; char state; int res; - pid_t ppid = 0, pgid = -1, sid = -1; + pid_t pid = 0, ppid = 0, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -390,7 +449,10 @@ static int do_task_stat(struct task_stru sid = sig->session; pgid = process_group(task); - ppid = rcu_dereference(task->real_parent)->tgid; + pid = vx_info_map_pid(task->vx_info, task->pid); + ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info, + rcu_dereference(task->real_parent)->tgid); + pgid = vx_info_map_pid(task->vx_info, pgid); unlock_task_sighand(task, &flags); } @@ -418,10 +480,21 @@ static int do_task_stat(struct task_stru /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); + /* fixup start time for virt uptime */ + if (vx_flags(VXF_VIRT_UPTIME, 0)) { + unsigned long long bias = + current->vx_info->cvirt.bias_clock; + + if (start_time > bias) + start_time -= bias; + else + start_time = 0; + } + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", - task->pid, + pid, tcomm, state, ppid, diff -NurpP --minimal linux-2.6.19/fs/proc/base.c linux-2.6.19-vs2.1.x-t1/fs/proc/base.c --- linux-2.6.19/fs/proc/base.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/base.c 2006-11-30 20:55:45 +0100 @@ -73,6 +73,9 @@ #include #include #include +#include +#include + #include "internal.h" /* NOTE: @@ -971,6 +974,8 @@ static struct inode *proc_pid_make_inode inode->i_uid = task->euid; inode->i_gid = task->egid; } + /* procfs is xid tagged */ + inode->i_tag = (tag_t)vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -1023,7 +1028,15 @@ static int pid_revalidate(struct dentry { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + int ret = 0; + if (task) { + int pid = (inode->i_ino >> 16) & 0xFFFF; + + if (!proc_pid_visible(task, pid)) + goto out_put; + + ret = 1; if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { inode->i_uid = task->euid; @@ -1034,11 +1047,11 @@ static int pid_revalidate(struct dentry } inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); + out_put: put_task_struct(task); - return 1; } d_drop(dentry); - return 0; + return ret; } static int pid_delete_dentry(struct dentry * dentry) @@ -1404,6 +1417,13 @@ static struct dentry *proc_pident_lookup if (!task) goto out_no_task; + /* FIXME: maybe we can come up with a generic approach? */ + if (task_vx_flags(task, VXF_HIDE_VINFO, 0) && + (dentry->d_name.len == 5) && + (!memcmp(dentry->d_name.name, "vinfo", 5) || + !memcmp(dentry->d_name.name, "ninfo", 5))) + goto out; + /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc// without very good reasons. @@ -1608,14 +1628,14 @@ static int proc_self_readlink(struct den int buflen) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", vx_map_tgid(current->tgid)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -1749,6 +1769,9 @@ static int proc_base_fill_cache(struct f static struct file_operations proc_task_operations; static struct inode_operations proc_task_inode_operations; +extern int proc_pid_vx_info(struct task_struct *, char *); +extern int proc_pid_nx_info(struct task_struct *, char *); + static struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), @@ -1786,6 +1809,8 @@ static struct pid_entry tgid_base_stuff[ #ifdef CONFIG_CPUSETS REG("cpuset", S_IRUGO, cpuset), #endif + INF("vinfo", S_IRUGO, pid_vx_info), + INF("ninfo", S_IRUGO, pid_nx_info), INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -1927,7 +1952,7 @@ struct dentry *proc_pid_lookup(struct in goto out; rcu_read_lock(); - task = find_task_by_pid(tgid); + task = find_proc_task_by_pid(tgid); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2131,9 +2156,11 @@ static struct dentry *proc_task_lookup(s tid = name_to_int(dentry); if (tid == ~0U) goto out; + if (vx_current_initpid(tid)) + goto out; rcu_read_lock(); - task = find_task_by_pid(tid); + task = find_proc_task_by_pid(tid); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2171,7 +2198,7 @@ static struct task_struct *first_tid(str rcu_read_lock(); /* Attempt to start with the pid of a thread */ if (tid && (nr > 0)) { - pos = find_task_by_pid(tid); + pos = find_proc_task_by_pid(tid); if (pos && (pos->group_leader == leader)) goto found; } @@ -2268,7 +2295,10 @@ static int proc_task_readdir(struct file for (task = first_tid(leader, tid, pos - 2); task; task = next_tid(task), pos++) { - tid = task->pid; + tid = vx_map_pid(task->pid); + /* FIXME: should go away now! */ + if (!proc_pid_visible(task, tid)) + continue; if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ diff -NurpP --minimal linux-2.6.19/fs/proc/generic.c linux-2.6.19-vs2.1.x-t1/fs/proc/generic.c --- linux-2.6.19/fs/proc/generic.c 2006-06-18 04:54:45 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/generic.c 2006-11-08 04:57:41 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -395,12 +396,16 @@ struct dentry *proc_lookup(struct inode for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + /* generic proc entries belong to the host */ + inode->i_tag = 0; spin_lock(&proc_subdir_lock); break; } @@ -476,12 +481,15 @@ int proc_readdir(struct file * filp, } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; /* filldir passes info to user space */ spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; spin_lock(&proc_subdir_lock); + skip: filp->f_pos++; de = de->next; } while (de); @@ -604,6 +612,7 @@ static struct proc_dir_entry *proc_creat ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -624,7 +633,8 @@ struct proc_dir_entry *proc_symlink(cons kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; diff -NurpP --minimal linux-2.6.19/fs/proc/inode.c linux-2.6.19-vs2.1.x-t1/fs/proc/inode.c --- linux-2.6.19/fs/proc/inode.c 2006-09-20 16:58:35 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/inode.c 2006-11-08 04:57:41 +0100 @@ -168,6 +168,8 @@ struct inode *proc_get_inode(struct supe inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff -NurpP --minimal linux-2.6.19/fs/proc/internal.h linux-2.6.19-vs2.1.x-t1/fs/proc/internal.h --- linux-2.6.19/fs/proc/internal.h 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/internal.h 2006-11-08 04:57:41 +0100 @@ -10,6 +10,7 @@ */ #include +#include struct vmalloc_info { unsigned long used; @@ -58,7 +59,7 @@ static inline struct pid *proc_pid(struc static inline struct task_struct *get_proc_task(struct inode *inode) { - return get_pid_task(proc_pid(inode), PIDTYPE_PID); + return vx_get_proc_task(inode, proc_pid(inode)); } static inline int proc_fd(struct inode *inode) diff -NurpP --minimal linux-2.6.19/fs/proc/proc_misc.c linux-2.6.19-vs2.1.x-t1/fs/proc/proc_misc.c --- linux-2.6.19/fs/proc/proc_misc.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/proc_misc.c 2006-11-14 03:17:51 +0100 @@ -46,6 +46,7 @@ #include #include #include +// #include #include #include #include @@ -53,6 +54,8 @@ #include #include "internal.h" +#include + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -82,17 +85,32 @@ static int proc_calc_metrics(char *page, static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { + unsigned int running, threads; int a, b, c; int len; - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + if (vx_flags(VXF_VIRT_LOAD, 0)) { + struct vx_info *vxi = current->vx_info; + + a = vxi->cvirt.load[0] + (FIXED_1/200); + b = vxi->cvirt.load[1] + (FIXED_1/200); + c = vxi->cvirt.load[2] + (FIXED_1/200); + + running = atomic_read(&vxi->cvirt.nr_running); + threads = atomic_read(&vxi->cvirt.nr_threads); + } else { + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + + running = nr_running(); + threads = nr_threads; + } + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, init_pspace.last_pid); + running, threads, init_pspace.last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -106,6 +124,9 @@ static int uptime_read_proc(char *page, do_posix_clock_monotonic_gettime(&uptime); cputime_to_timespec(idletime, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), @@ -142,7 +163,7 @@ static int meminfo_read_proc(char *page, cached = global_page_state(NR_FILE_PAGES) - total_swapcache_pages - i.bufferram; - if (cached < 0) + if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0)) cached = 0; get_vmalloc_info(&vmi); @@ -252,8 +273,8 @@ static int version_read_proc(char *page, { int len; - strcpy(page, linux_banner); - len = strlen(page); + len = sprintf(page, vx_linux_banner, + utsname()->release, utsname()->version); return proc_calc_metrics(page, start, off, count, eof, len); } diff -NurpP --minimal linux-2.6.19/fs/proc/root.c linux-2.6.19-vs2.1.x-t1/fs/proc/root.c --- linux-2.6.19/fs/proc/root.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/proc/root.c 2006-11-08 04:57:41 +0100 @@ -25,6 +25,9 @@ struct proc_dir_entry *proc_net, *proc_n #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -89,6 +92,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); + proc_vx_init(); } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff -NurpP --minimal linux-2.6.19/fs/quota.c linux-2.6.19-vs2.1.x-t1/fs/quota.c --- linux-2.6.19/fs/quota.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/quota.c 2006-11-30 19:41:26 +0100 @@ -17,47 +17,122 @@ #include #include #include +#include +#include +#include + + +/* Dquota Hash Management Functions */ + +static LIST_HEAD(dqhash_list); + +struct dqhash *new_dqhash(struct super_block *sb, unsigned int id) +{ + struct dqhash *hash; + int err; + + err = -ENOMEM; + hash = kmalloc(sizeof(struct dqhash), GFP_USER); + if (!hash) + goto out; + + memset(hash, 0, sizeof(struct dqhash)); + hash->dqh_id = id; + atomic_set(&hash->dqh_count, 1); + + INIT_LIST_HEAD(&hash->dqh_list); + + mutex_init(&hash->dqh_dqopt.dqio_mutex); + mutex_init(&hash->dqh_dqopt.dqonoff_mutex); + init_rwsem(&hash->dqh_dqopt.dqptr_sem); + hash->dqh_qop = sb->s_qop; + hash->dqh_qcop = sb->s_qcop; + hash->dqh_sb = sb; + + lock_kernel(); + list_add(&hash->dqh_list, &dqhash_list); + unlock_kernel(); + vxdprintk(VXD_CBIT(misc, 0), + "new_dqhash: %p [#0x%08x]", hash, hash->dqh_id); + return hash; + + // kfree(hash); +out: + return ERR_PTR(err); +} + +void destroy_dqhash(struct dqhash *hash) +{ + vxdprintk(VXD_CBIT(misc, 0), + "destroy_dqhash: %p [#0x%08x] c=%d", + hash, hash->dqh_id, atomic_read(&hash->dqh_count)); + lock_kernel(); + list_del_init(&hash->dqh_list); + unlock_kernel(); + kfree(hash); +} + + +struct dqhash *find_dqhash(unsigned int id) +{ + struct list_head *head; + struct dqhash *hash; + + lock_kernel(); + list_for_each(head, &dqhash_list) { + hash = list_entry(head, struct dqhash, dqh_list); + if (hash->dqh_id == id) + goto dqh_found; + } + unlock_kernel(); + return NULL; + +dqh_found: + unlock_kernel(); + return dqhget(hash); +} + /* Check validity of generic quotactl commands */ -static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int generic_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { if (type >= MAXQUOTAS) return -EINVAL; - if (!sb && cmd != Q_SYNC) + if (!hash && cmd != Q_SYNC) return -ENODEV; /* Is operation supported? */ - if (sb && !sb->s_qcop) + if (hash && !hash->dqh_qcop) return -ENOSYS; switch (cmd) { case Q_GETFMT: break; case Q_QUOTAON: - if (!sb->s_qcop->quota_on) + if (!hash->dqh_qcop->quota_on) return -ENOSYS; break; case Q_QUOTAOFF: - if (!sb->s_qcop->quota_off) + if (!hash->dqh_qcop->quota_off) return -ENOSYS; break; case Q_SETINFO: - if (!sb->s_qcop->set_info) + if (!hash->dqh_qcop->set_info) return -ENOSYS; break; case Q_GETINFO: - if (!sb->s_qcop->get_info) + if (!hash->dqh_qcop->get_info) return -ENOSYS; break; case Q_SETQUOTA: - if (!sb->s_qcop->set_dqblk) + if (!hash->dqh_qcop->set_dqblk) return -ENOSYS; break; case Q_GETQUOTA: - if (!sb->s_qcop->get_dqblk) + if (!hash->dqh_qcop->get_dqblk) return -ENOSYS; break; case Q_SYNC: - if (sb && !sb->s_qcop->quota_sync) + if (hash && !hash->dqh_qcop->quota_sync) return -ENOSYS; break; default: @@ -73,7 +148,7 @@ static int generic_quotactl_valid(struct case Q_SETQUOTA: case Q_GETQUOTA: /* This is just informative test so we are satisfied without a lock */ - if (!sb_has_quota_enabled(sb, type)) + if (!dqh_has_quota_enabled(hash, type)) return -ESRCH; } @@ -81,47 +156,47 @@ static int generic_quotactl_valid(struct if (cmd == Q_GETQUOTA) { if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; return 0; } /* Check validity of XFS Quota Manager commands */ -static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int xqm_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { if (type >= XQM_MAXQUOTAS) return -EINVAL; - if (!sb) + if (!hash) return -ENODEV; - if (!sb->s_qcop) + if (!hash->dqh_qcop) return -ENOSYS; switch (cmd) { case Q_XQUOTAON: case Q_XQUOTAOFF: case Q_XQUOTARM: - if (!sb->s_qcop->set_xstate) + if (!hash->dqh_qcop->set_xstate) return -ENOSYS; break; case Q_XGETQSTAT: - if (!sb->s_qcop->get_xstate) + if (!hash->dqh_qcop->get_xstate) return -ENOSYS; break; case Q_XSETQLIM: - if (!sb->s_qcop->set_xquota) + if (!hash->dqh_qcop->set_xquota) return -ENOSYS; break; case Q_XGETQUOTA: - if (!sb->s_qcop->get_xquota) + if (!hash->dqh_qcop->get_xquota) return -ENOSYS; break; case Q_XQUOTASYNC: - if (!sb->s_qcop->quota_sync) + if (!hash->dqh_qcop->quota_sync) return -ENOSYS; break; default: @@ -132,57 +207,68 @@ static int xqm_quotactl_valid(struct sup if (cmd == Q_XGETQUOTA) { if (((type == XQM_USRQUOTA && current->euid != id) || (type == XQM_GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) + !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) { - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return -EPERM; } return 0; } -static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) +static int check_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id) { int error; if (XQM_COMMAND(cmd)) - error = xqm_quotactl_valid(sb, type, cmd, id); + error = xqm_quotactl_valid(hash, type, cmd, id); else - error = generic_quotactl_valid(sb, type, cmd, id); + error = generic_quotactl_valid(hash, type, cmd, id); if (!error) - error = security_quotactl(cmd, type, id, sb); + error = security_quotactl(cmd, type, id, hash); return error; } -static void quota_sync_sb(struct super_block *sb, int type) +static void quota_sync_sb(struct super_block *sb) { - int cnt; - struct inode *discard[MAXQUOTAS]; - - sb->s_qcop->quota_sync(sb, type); /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); +} + +static void quota_sync_dqh(struct dqhash *hash, int type) +{ + int cnt; + struct inode *discard[MAXQUOTAS]; + + vxdprintk(VXD_CBIT(quota, 1), + "quota_sync_dqh(%p,%d)", hash, type); + hash->dqh_qcop->quota_sync(hash, type); + + quota_sync_sb(hash->dqh_sb); /* Now when everything is written we can discard the pagecache so * that userspace sees the changes. We need i_mutex and so we could * not do it inside dqonoff_mutex. Moreover we need to be carefull * about races with quotaoff() (that is the reason why we have own * reference to inode). */ - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_lock(&dqh_dqopt(hash)->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { discard[cnt] = NULL; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!dqh_has_quota_enabled(hash, cnt)) continue; - discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]); + vxdprintk(VXD_CBIT(quota, 0), + "quota_sync_dqh(%p,%d) discard inode %p", + hash, type, dqh_dqopt(hash)->files[cnt]); + discard[cnt] = igrab(dqh_dqopt(hash)->files[cnt]); } - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + mutex_unlock(&dqh_dqopt(hash)->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (discard[cnt]) { mutex_lock(&discard[cnt]->i_mutex); @@ -193,67 +279,59 @@ static void quota_sync_sb(struct super_b } } -void sync_dquots(struct super_block *sb, int type) +void sync_dquots_dqh(struct dqhash *hash, int type) { - int cnt, dirty; + vxdprintk(VXD_CBIT(quota, 1), + "sync_dquots_dqh(%p,%d)", hash, type); - if (sb) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - return; - } + if (hash->dqh_qcop->quota_sync) + quota_sync_dqh(hash, type); +} - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root && sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; +void sync_dquots(struct dqhash *hash, int type) + +{ + vxdprintk(VXD_CBIT(quota, 1), + "sync_dquots(%p,%d)", hash, type); + + if (hash) { + if (hash->dqh_qcop->quota_sync) + quota_sync_dqh(hash, type); + return; } - spin_unlock(&sb_lock); } /* Copy parameters and call proper function */ -static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr) +static int do_quotactl(struct dqhash *hash, int type, int cmd, qid_t id, void __user *addr) { int ret; + vxdprintk(VXD_CBIT(quota, 3), + "do_quotactl(%p,%d,cmd=%d,id=%d,%p)", hash, type, cmd, id, addr); + switch (cmd) { case Q_QUOTAON: { char *pathname; if (IS_ERR(pathname = getname(addr))) return PTR_ERR(pathname); - ret = sb->s_qcop->quota_on(sb, type, id, pathname); + ret = hash->dqh_qcop->quota_on(hash, type, id, pathname); putname(pathname); return ret; } case Q_QUOTAOFF: - return sb->s_qcop->quota_off(sb, type); + return hash->dqh_qcop->quota_off(hash, type); case Q_GETFMT: { __u32 fmt; - down_read(&sb_dqopt(sb)->dqptr_sem); - if (!sb_has_quota_enabled(sb, type)) { - up_read(&sb_dqopt(sb)->dqptr_sem); + down_read(&dqh_dqopt(hash)->dqptr_sem); + if (!dqh_has_quota_enabled(hash, type)) { + up_read(&dqh_dqopt(hash)->dqptr_sem); return -ESRCH; } - fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - up_read(&sb_dqopt(sb)->dqptr_sem); + fmt = dqh_dqopt(hash)->info[type].dqi_format->qf_fmt_id; + up_read(&dqh_dqopt(hash)->dqptr_sem); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; @@ -261,7 +339,7 @@ static int do_quotactl(struct super_bloc case Q_GETINFO: { struct if_dqinfo info; - if ((ret = sb->s_qcop->get_info(sb, type, &info))) + if ((ret = hash->dqh_qcop->get_info(hash, type, &info))) return ret; if (copy_to_user(addr, &info, sizeof(info))) return -EFAULT; @@ -272,12 +350,12 @@ static int do_quotactl(struct super_bloc if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; - return sb->s_qcop->set_info(sb, type, &info); + return hash->dqh_qcop->set_info(hash, type, &info); } case Q_GETQUOTA: { struct if_dqblk idq; - if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq))) + if ((ret = hash->dqh_qcop->get_dqblk(hash, type, id, &idq))) return ret; if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; @@ -288,10 +366,10 @@ static int do_quotactl(struct super_bloc if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; - return sb->s_qcop->set_dqblk(sb, type, id, &idq); + return hash->dqh_qcop->set_dqblk(hash, type, id, &idq); } case Q_SYNC: - sync_dquots(sb, type); + sync_dquots_dqh(hash, type); return 0; case Q_XQUOTAON: @@ -301,12 +379,12 @@ static int do_quotactl(struct super_bloc if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; - return sb->s_qcop->set_xstate(sb, flags, cmd); + return hash->dqh_qcop->set_xstate(hash, flags, cmd); } case Q_XGETQSTAT: { struct fs_quota_stat fqs; - if ((ret = sb->s_qcop->get_xstate(sb, &fqs))) + if ((ret = hash->dqh_qcop->get_xstate(hash, &fqs))) return ret; if (copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; @@ -317,19 +395,19 @@ static int do_quotactl(struct super_bloc if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; - return sb->s_qcop->set_xquota(sb, type, id, &fdq); + return hash->dqh_qcop->set_xquota(hash, type, id, &fdq); } case Q_XGETQUOTA: { struct fs_disk_quota fdq; - if ((ret = sb->s_qcop->get_xquota(sb, type, id, &fdq))) + if ((ret = hash->dqh_qcop->get_xquota(hash, type, id, &fdq))) return ret; if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return 0; } case Q_XQUOTASYNC: - return sb->s_qcop->quota_sync(sb, type); + return hash->dqh_qcop->quota_sync(hash, type); /* We never reach here unless validity check is broken */ default: BUG(); @@ -375,6 +453,7 @@ asmlinkage long sys_quotactl(unsigned in { uint cmds, type; struct super_block *sb = NULL; + struct dqhash *dqh = NULL; int ret; cmds = cmd >> SUBCMDSHIFT; @@ -386,9 +465,11 @@ asmlinkage long sys_quotactl(unsigned in return PTR_ERR(sb); } - ret = check_quotactl_valid(sb, type, cmds, id); + if (sb) + dqh = sb->s_dqh; + ret = check_quotactl_valid(dqh, type, cmds, id); if (ret >= 0) - ret = do_quotactl(sb, type, cmds, id, addr); + ret = do_quotactl(dqh, type, cmds, id, addr); if (sb) drop_super(sb); diff -NurpP --minimal linux-2.6.19/fs/quota_v1.c linux-2.6.19-vs2.1.x-t1/fs/quota_v1.c --- linux-2.6.19/fs/quota_v1.c 2005-03-02 12:38:45 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/quota_v1.c 2006-11-08 04:57:51 +0100 @@ -42,12 +42,13 @@ static int v1_read_dqblk(struct dquot *d int type = dquot->dq_type; struct v1_disk_dqblk dqblk; - if (!sb_dqopt(dquot->dq_sb)->files[type]) + if (!dqh_dqopt(dquot->dq_dqh)->files[type]) return -EINVAL; /* Set structure to 0s in case read fails/is after end of file */ memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); - dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); + dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 && @@ -66,16 +67,16 @@ static int v1_commit_dqblk(struct dquot v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); if (dquot->dq_id == 0) { - dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; - dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace; + dqblk.dqb_btime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_bgrace; + dqblk.dqb_itime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_igrace; } ret = 0; - if (sb_dqopt(dquot->dq_sb)->files[type]) - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk, - sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); + if (dqh_dqopt(dquot->dq_dqh)->files[type]) + ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); if (ret != sizeof(struct v1_disk_dqblk)) { printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", - dquot->dq_sb->s_id); + dquot->dq_dqh->dqh_sb->s_id); if (ret >= 0) ret = -EIO; goto out; @@ -100,9 +101,9 @@ struct v2_disk_dqheader { __le32 dqh_version; /* File version */ }; -static int v1_check_quota_file(struct super_block *sb, int type) +static int v1_check_quota_file(struct dqhash *hash, int type) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; ulong blocks; size_t off; struct v2_disk_dqheader dqhead; @@ -118,22 +119,26 @@ static int v1_check_quota_file(struct su if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk)) return 0; /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */ - size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) return 1; /* Probably not new format */ if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type]) return 1; /* Definitely not new format */ - printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id); + printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file." + " It probably contains newer quota format.\n", hash->dqh_sb->s_id); return 0; /* Seems like a new format file -> refuse it */ } -static int v1_read_file_info(struct super_block *sb, int type) +static int v1_read_file_info(struct dqhash *hash, int type) { - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct v1_disk_dqblk dqblk; int ret; - if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { + if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqblk, sizeof(struct v1_disk_dqblk), + v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { if (ret >= 0) ret = -EIO; goto out; @@ -145,14 +150,14 @@ out: return ret; } -static int v1_write_file_info(struct super_block *sb, int type) +static int v1_write_file_info(struct dqhash *hash, int type) { - struct quota_info *dqopt = sb_dqopt(sb); + struct quota_info *dqopt = dqh_dqopt(hash); struct v1_disk_dqblk dqblk; int ret; dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY; - if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, + if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) { if (ret >= 0) ret = -EIO; @@ -160,7 +165,7 @@ static int v1_write_file_info(struct sup } dqblk.dqb_itime = dqopt->info[type].dqi_igrace; dqblk.dqb_btime = dqopt->info[type].dqi_bgrace; - ret = sb->s_op->quota_write(sb, type, (char *)&dqblk, + ret = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0)); if (ret == sizeof(struct v1_disk_dqblk)) ret = 0; diff -NurpP --minimal linux-2.6.19/fs/quota_v2.c linux-2.6.19-vs2.1.x-t1/fs/quota_v2.c --- linux-2.6.19/fs/quota_v2.c 2006-06-18 04:54:47 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/quota_v2.c 2006-11-08 04:57:51 +0100 @@ -26,14 +26,15 @@ typedef char *dqbuf_t; #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) /* Check whether given file is really vfsv0 quotafile */ -static int v2_check_quota_file(struct super_block *sb, int type) +static int v2_check_quota_file(struct dqhash *hash, int type) { struct v2_disk_dqheader dqhead; ssize_t size; static const uint quota_magics[] = V2_INITQMAGICS; static const uint quota_versions[] = V2_INITQVERSIONS; - size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) { printk("quota_v2: failed read expected=%zd got=%zd\n", sizeof(struct v2_disk_dqheader), size); @@ -46,17 +47,17 @@ static int v2_check_quota_file(struct su } /* Read information header from quota file */ -static int v2_read_file_info(struct super_block *sb, int type) +static int v2_read_file_info(struct dqhash *hash, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = dqh_dqopt(hash)->info+type; ssize_t size; - size = sb->s_op->quota_read(sb, type, (char *)&dinfo, - sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); + size = hash->dqh_sb->s_op->quota_read(hash, type, + (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't read info structure on device %s.\n", - sb->s_id); + hash->dqh_sb->s_id); return -1; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); @@ -69,10 +70,10 @@ static int v2_read_file_info(struct supe } /* Write information header to quota file */ -static int v2_write_file_info(struct super_block *sb, int type) +static int v2_write_file_info(struct dqhash *hash, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = dqh_dqopt(hash)->info+type; ssize_t size; spin_lock(&dq_data_lock); @@ -84,11 +85,11 @@ static int v2_write_file_info(struct sup dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); - size = sb->s_op->quota_write(sb, type, (char *)&dinfo, + size = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't write info structure on device %s.\n", - sb->s_id); + hash->dqh_sb->s_id); return -1; } return 0; @@ -132,24 +133,24 @@ static inline void freedqbuf(dqbuf_t buf kfree(buf); } -static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) +static inline ssize_t read_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf) { memset(buf, 0, V2_DQBLKSIZE); - return sb->s_op->quota_read(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); + return hash->dqh_sb->s_op->quota_read(hash, type, + (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); } -static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) +static inline ssize_t write_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf) { - return sb->s_op->quota_write(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); + return hash->dqh_sb->s_op->quota_write(hash, type, + (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); } /* Remove empty block from list and return it */ -static int get_free_dqblk(struct super_block *sb, int type) +static int get_free_dqblk(struct dqhash *hash, int type) { dqbuf_t buf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int ret, blk; @@ -157,18 +158,18 @@ static int get_free_dqblk(struct super_b return -ENOMEM; if (info->u.v2_i.dqi_free_blk) { blk = info->u.v2_i.dqi_free_blk; - if ((ret = read_blk(sb, type, blk, buf)) < 0) + if ((ret = read_blk(hash, type, blk, buf)) < 0) goto out_buf; info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); } else { memset(buf, 0, V2_DQBLKSIZE); /* Assure block allocation... */ - if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0) + if ((ret = write_blk(hash, type, info->u.v2_i.dqi_blocks, buf)) < 0) goto out_buf; blk = info->u.v2_i.dqi_blocks++; } - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); ret = blk; out_buf: freedqbuf(buf); @@ -176,9 +177,9 @@ out_buf: } /* Insert empty block to the list */ -static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int put_free_dqblk(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -186,18 +187,18 @@ static int put_free_dqblk(struct super_b dh->dqdh_prev_free = cpu_to_le32(0); dh->dqdh_entries = cpu_to_le16(0); info->u.v2_i.dqi_free_blk = blk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); /* Some strange block. We had better leave it... */ - if ((err = write_blk(sb, type, blk, buf)) < 0) + if ((err = write_blk(hash, type, blk, buf)) < 0) return err; return 0; } /* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int remove_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); int err; @@ -205,27 +206,27 @@ static int remove_free_dqentry(struct su if (!tmpbuf) return -ENOMEM; if (nextblk) { - if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0) + if ((err = read_blk(hash, type, nextblk, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free; - if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0) + if ((err = write_blk(hash, type, nextblk, tmpbuf)) < 0) goto out_buf; } if (prevblk) { - if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0) + if ((err = read_blk(hash, type, prevblk, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free; - if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0) + if ((err = write_blk(hash, type, prevblk, tmpbuf)) < 0) goto out_buf; } else { info->u.v2_i.dqi_free_entry = nextblk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); } freedqbuf(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); /* No matter whether write succeeds block is out of list */ - if (write_blk(sb, type, blk, buf) < 0) + if (write_blk(hash, type, blk, buf) < 0) printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); return 0; out_buf: @@ -234,10 +235,10 @@ out_buf: } /* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int insert_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct mem_dqinfo *info = dqh_dqinfo(hash, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -245,18 +246,18 @@ static int insert_free_dqentry(struct su return -ENOMEM; dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry); dh->dqdh_prev_free = cpu_to_le32(0); - if ((err = write_blk(sb, type, blk, buf)) < 0) + if ((err = write_blk(hash, type, blk, buf)) < 0) goto out_buf; if (info->u.v2_i.dqi_free_entry) { - if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + if ((err = read_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) goto out_buf; ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk); - if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) + if ((err = write_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) goto out_buf; } freedqbuf(tmpbuf); info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, type); + mark_info_dirty(hash, type); return 0; out_buf: freedqbuf(tmpbuf); @@ -266,8 +267,9 @@ out_buf: /* Find space for dquot */ static uint find_free_dqentry(struct dquot *dquot, int *err) { - struct super_block *sb = dquot->dq_sb; - struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; + struct mem_dqinfo *info = dqh_dqopt(dqh)->info+dquot->dq_type; uint blk, i; struct v2_disk_dqdbheader *dh; struct v2_disk_dqblk *ddquot; @@ -283,11 +285,11 @@ static uint find_free_dqentry(struct dqu ddquot = GETENTRIES(buf); if (info->u.v2_i.dqi_free_entry) { blk = info->u.v2_i.dqi_free_entry; - if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0) + if ((*err = read_blk(dqh, dquot->dq_type, blk, buf)) < 0) goto out_buf; } else { - blk = get_free_dqblk(sb, dquot->dq_type); + blk = get_free_dqblk(dqh, dquot->dq_type); if ((int)blk < 0) { *err = blk; freedqbuf(buf); @@ -296,10 +298,10 @@ static uint find_free_dqentry(struct dqu memset(buf, 0, V2_DQBLKSIZE); /* This is enough as block is already zeroed and entry list is empty... */ info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, dquot->dq_type); + mark_info_dirty(dqh, dquot->dq_type); } if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ - if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) { + if ((*err = remove_free_dqentry(dqh, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } @@ -314,7 +316,7 @@ static uint find_free_dqentry(struct dqu goto out_buf; } #endif - if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) { + if ((*err = write_blk(dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); goto out_buf; } @@ -329,7 +331,7 @@ out_buf: /* Insert reference to structure into the trie */ static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) { - struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; dqbuf_t buf; int ret = 0, newson = 0, newact = 0; __le32 *ref; @@ -338,7 +340,7 @@ static int do_insert_tree(struct dquot * if (!(buf = getdqbuf())) return -ENOMEM; if (!*treeblk) { - ret = get_free_dqblk(sb, dquot->dq_type); + ret = get_free_dqblk(dqh, dquot->dq_type); if (ret < 0) goto out_buf; *treeblk = ret; @@ -346,7 +348,7 @@ static int do_insert_tree(struct dquot * newact = 1; } else { - if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) { + if ((ret = read_blk(dqh, dquot->dq_type, *treeblk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk); goto out_buf; } @@ -369,10 +371,10 @@ static int do_insert_tree(struct dquot * ret = do_insert_tree(dquot, &newblk, depth+1); if (newson && ret >= 0) { ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk); - ret = write_blk(sb, dquot->dq_type, *treeblk, buf); + ret = write_blk(dqh, dquot->dq_type, *treeblk, buf); } else if (newact && ret < 0) - put_free_dqblk(sb, dquot->dq_type, buf, *treeblk); + put_free_dqblk(dqh, dquot->dq_type, buf, *treeblk); out_buf: freedqbuf(buf); return ret; @@ -409,10 +411,11 @@ static int v2_write_dquot(struct dquot * if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) ddquot.dqb_itime = cpu_to_le64(1); spin_unlock(&dq_data_lock); - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, + ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type, (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off); if (ret != sizeof(struct v2_disk_dqblk)) { - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id); + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", + dquot->dq_dqh->dqh_sb->s_id); if (ret >= 0) ret = -ENOSPC; } @@ -426,7 +429,8 @@ static int v2_write_dquot(struct dquot * /* Free dquot entry in data block */ static int free_dqentry(struct dquot *dquot, uint blk) { - struct super_block *sb = dquot->dq_sb; + // struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; int type = dquot->dq_type; struct v2_disk_dqdbheader *dh; dqbuf_t buf = getdqbuf(); @@ -440,15 +444,15 @@ static int free_dqentry(struct dquot *dq (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS)); goto out_buf; } - if ((ret = read_blk(sb, type, blk, buf)) < 0) { + if ((ret = read_blk(dqh, type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); goto out_buf; } dh = (struct v2_disk_dqdbheader *)buf; dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || - (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { + if ((ret = remove_free_dqentry(dqh, type, buf, blk)) < 0 || + (ret = put_free_dqblk(dqh, type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't move quota data block (%u) " "to free list.\n", blk); goto out_buf; @@ -459,13 +463,13 @@ static int free_dqentry(struct dquot *dq sizeof(struct v2_disk_dqblk)); if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { /* Insert will write block itself */ - if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) { + if ((ret = insert_free_dqentry(dqh, type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); goto out_buf; } } else - if ((ret = write_blk(sb, type, blk, buf)) < 0) { + if ((ret = write_blk(dqh, type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't write quota data " "block %u\n", blk); goto out_buf; @@ -480,7 +484,7 @@ out_buf: /* Remove reference to dquot from tree */ static int remove_tree(struct dquot *dquot, uint *blk, int depth) { - struct super_block *sb = dquot->dq_sb; + struct dqhash *dqh = dquot->dq_dqh; int type = dquot->dq_type; dqbuf_t buf = getdqbuf(); int ret = 0; @@ -489,7 +493,7 @@ static int remove_tree(struct dquot *dqu if (!buf) return -ENOMEM; - if ((ret = read_blk(sb, type, *blk, buf)) < 0) { + if ((ret = read_blk(dqh, type, *blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); goto out_buf; } @@ -506,11 +510,11 @@ static int remove_tree(struct dquot *dqu for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ /* Don't put the root block into the free block list */ if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) { - put_free_dqblk(sb, type, buf, *blk); + put_free_dqblk(dqh, type, buf, *blk); *blk = 0; } else - if ((ret = write_blk(sb, type, *blk, buf)) < 0) + if ((ret = write_blk(dqh, type, *blk, buf)) < 0) printk(KERN_ERR "VFS: Can't write quota tree " "block %u.\n", *blk); } @@ -539,7 +543,7 @@ static loff_t find_block_dqentry(struct if (!buf) return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { + if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } @@ -578,7 +582,7 @@ static loff_t find_tree_dqentry(struct d if (!buf) return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { + if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) { printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } @@ -610,7 +614,7 @@ static int v2_read_dquot(struct dquot *d #ifdef __QUOTA_V2_PARANOIA /* Invalidated quota? */ - if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) { + if (!dquot->dq_dqh || !dqh_dqopt(dquot->dq_dqh)->files[type]) { printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); return -EIO; } @@ -627,7 +631,7 @@ static int v2_read_dquot(struct dquot *d } else { dquot->dq_off = offset; - if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, + if ((ret = dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type, (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset)) != sizeof(struct v2_disk_dqblk)) { if (ret >= 0) diff -NurpP --minimal linux-2.6.19/fs/read_write.c linux-2.6.19-vs2.1.x-t1/fs/read_write.c --- linux-2.6.19/fs/read_write.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/read_write.c 2006-11-08 04:57:51 +0100 @@ -703,12 +703,77 @@ sys_writev(unsigned long fd, const struc return ret; } +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos, + size_t count, loff_t max) +{ + struct inode * in_inode, * out_inode; + loff_t pos; + ssize_t ret; + + /* verify in_file */ + in_inode = in_file->f_dentry->d_inode; + if (!in_inode) + return -EINVAL; + if (!in_file->f_op || !in_file->f_op->sendfile) + return -EINVAL; + + if (!ppos) + ppos = &in_file->f_pos; + else + if (!(in_file->f_mode & FMODE_PREAD)) + return -ESPIPE; + + ret = rw_verify_area(READ, in_file, ppos, count); + if (ret < 0) + return ret; + count = ret; + + /* verify out_file */ + out_inode = out_file->f_dentry->d_inode; + if (!out_inode) + return -EINVAL; + if (!out_file->f_op || !out_file->f_op->sendpage) + return -EINVAL; + + ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + if (ret < 0) + return ret; + count = ret; + + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) + return ret; + + if (!max) + max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); + + pos = *ppos; + if (unlikely(pos < 0)) + return -EINVAL; + if (unlikely(pos + count > max)) { + if (pos >= max) + return -EOVERFLOW; + count = max - pos; + } + + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + + if (ret > 0) { + current->rchar += ret; + current->wchar += ret; + } + + if (*ppos > max) + return -EOVERFLOW; + return ret; +} + +EXPORT_SYMBOL(vfs_sendfile); + static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; - loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; @@ -721,22 +786,6 @@ static ssize_t do_sendfile(int out_fd, i goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) - goto fput_in; - retval = -ESPIPE; - if (!ppos) - ppos = &in_file->f_pos; - else - if (!(in_file->f_mode & FMODE_PREAD)) - goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval < 0) - goto fput_in; - count = retval; retval = security_file_permission (in_file, MAY_READ); if (retval) @@ -751,45 +800,12 @@ static ssize_t do_sendfile(int out_fd, i goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) - goto fput_out; - out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval < 0) - goto fput_out; - count = retval; - - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - - if (!max) - max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); - - pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; - if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; - if (pos >= max) - goto fput_out; - count = max - pos; - } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + retval = vfs_sendfile(out_file, in_file, ppos, count, max); - if (retval > 0) { - current->rchar += retval; - current->wchar += retval; - } current->syscr++; current->syscw++; - if (*ppos > max) - retval = -EOVERFLOW; - fput_out: fput_light(out_file, fput_needed_out); fput_in: diff -NurpP --minimal linux-2.6.19/fs/reiserfs/bitmap.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/bitmap.c --- linux-2.6.19/fs/reiserfs/bitmap.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/bitmap.c 2006-11-08 04:57:50 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #define PREALLOCATION_SIZE 9 @@ -425,8 +426,10 @@ static void _reiserfs_free_block(struct set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); journal_mark_dirty(th, s, sbh); - if (for_unformatted) + if (for_unformatted) { + DLIMIT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK_NODIRTY(inode, 1); + } } void reiserfs_free_block(struct reiserfs_transaction_handle *th, @@ -1034,6 +1037,7 @@ static inline int blocknrs_and_prealloc_ b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; int passno = 0; int nr_allocated = 0; + int blocks; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1043,19 +1047,30 @@ static inline int blocknrs_and_prealloc_ "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); #endif - quota_ret = - DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ + quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, + amount_needed); + if (quota_ret) return QUOTA_EXCEEDED; + if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + amount_needed); + return NO_DISK_SPACE; + } + if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG reiserfs_debug(s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); #endif - quota_ret = - DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, - hint->prealloc_size); + quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + if (!quota_ret && + DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) { + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + hint->prealloc_size); + quota_ret = 1; + } if (quota_ret) hint->preallocate = hint->prealloc_size = 0; } @@ -1087,7 +1102,10 @@ static inline int blocknrs_and_prealloc_ nr_allocated, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + /* Free not allocated blocks */ + blocks = amount_needed + hint->prealloc_size - nr_allocated; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } while (nr_allocated--) reiserfs_free_block(hint->th, hint->inode, @@ -1118,10 +1136,10 @@ static inline int blocknrs_and_prealloc_ REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); + blocks = amount_needed + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count; + DLIMIT_FREE_BLOCK(hint->inode, blocks); + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks); } return CARRY_ON; diff -NurpP --minimal linux-2.6.19/fs/reiserfs/file.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/file.c --- linux-2.6.19/fs/reiserfs/file.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/file.c 2006-11-30 20:55:45 +0100 @@ -1575,6 +1575,7 @@ const struct file_operations reiserfs_fi .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, @@ -1589,4 +1590,5 @@ struct inode_operations reiserfs_file_in .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/reiserfs/inode.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/inode.c --- linux-2.6.19/fs/reiserfs/inode.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/inode.c 2006-11-30 18:53:18 +0100 @@ -16,6 +16,8 @@ #include #include #include +#include +#include static int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); @@ -50,6 +52,7 @@ void reiserfs_delete_inode(struct inode * stat data deletion */ if (!err) DQUOT_FREE_INODE(inode); + DLIMIT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) goto out; @@ -1114,6 +1117,8 @@ static void init_inode(struct inode *ino struct buffer_head *bh; struct item_head *ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER(path); @@ -1136,12 +1141,13 @@ static void init_inode(struct inode *ino (struct stat_data_v1 *)B_I_PITEM(bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version(inode, KEY_FORMAT_3_5); set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1183,11 +1189,12 @@ static void init_inode(struct inode *ino // (directories and symlinks) struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1217,6 +1224,10 @@ static void init_inode(struct inode *ino sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); } + inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid); + inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid); + inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0); + pathrelse(path); if (S_ISREG(inode->i_mode)) { inode->i_op = &reiserfs_file_inode_operations; @@ -1239,13 +1250,15 @@ static void init_inode(struct inode *ino static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; + uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag); + gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag); __u16 flags; + set_sd_v2_uid(sd_v2, uid); + set_sd_v2_gid(sd_v2, gid); set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); @@ -1776,6 +1789,10 @@ int reiserfs_new_inode(struct reiserfs_t BUG_ON(!th->t_trans_id); + if (DLIMIT_ALLOC_INODE(inode)) { + err = -ENOSPC; + goto out_bad_dlimit; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto out_end_trans; @@ -1960,6 +1977,9 @@ int reiserfs_new_inode(struct reiserfs_t DQUOT_FREE_INODE(inode); out_end_trans: + DLIMIT_FREE_INODE(inode); + + out_bad_dlimit: journal_end(th, th->t_super, th->t_blocks_allocated); /* Drop can be outside and it needs more credits so it's better to have it outside */ DQUOT_DROP(inode); @@ -2699,6 +2719,14 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (sd_attrs & REISERFS_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (sd_attrs & REISERFS_BARRIER_FL) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (sd_attrs & REISERFS_APPEND_FL) inode->i_flags |= S_APPEND; else @@ -2721,6 +2749,14 @@ void i_attrs_to_sd_attrs(struct inode *i *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if (inode->i_flags & S_IUNLINK) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if (inode->i_flags & S_BARRIER) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if (inode->i_flags & S_SYNC) *sd_attrs |= REISERFS_SYNC_FL; else @@ -2900,6 +2936,22 @@ static ssize_t reiserfs_direct_IO(int rw reiserfs_get_blocks_direct_io, NULL); } +int reiserfs_sync_flags(struct inode *inode) +{ + u16 oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_attrs; + newflags = oldflags; + i_attrs_to_sd_attrs(inode, &newflags); + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_attrs = newflags; + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + } + return 0; +} + int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -2949,9 +3001,11 @@ int reiserfs_setattr(struct dentry *dent } error = inode_change_ok(inode, attr); + if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) || + (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) { error = reiserfs_chown_xattrs(inode, attr); if (!error) { @@ -2981,6 +3035,9 @@ int reiserfs_setattr(struct dentry *dent inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_TAG) && + IS_TAGGED(inode)) + inode->i_tag = attr->ia_tag; mark_inode_dirty(inode); error = journal_end(&th, inode->i_sb, jbegin_count); diff -NurpP --minimal linux-2.6.19/fs/reiserfs/ioctl.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/ioctl.c --- linux-2.6.19/fs/reiserfs/ioctl.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/ioctl.c 2006-11-08 04:57:52 +0100 @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -24,7 +25,7 @@ static int reiserfs_unpack(struct inode int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -43,12 +44,14 @@ int reiserfs_ioctl(struct inode *inode, flags = REISERFS_I(inode)->i_attrs; i_attrs_to_sd_attrs(inode, (__u16 *) & flags); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int __user *)arg); case REISERFS_IOC_SETFLAGS:{ if (!reiserfs_attrs(inode->i_sb)) return -ENOTTY; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if ((current->fsuid != inode->i_uid) @@ -58,10 +61,12 @@ int reiserfs_ioctl(struct inode *inode, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) + oldflags = REISERFS_I(inode) -> i_attrs; + if (((oldflags & REISERFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL))) && + !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if ((flags & REISERFS_NOTAIL_FL) && @@ -72,6 +77,9 @@ int reiserfs_ioctl(struct inode *inode, if (result) return result; } + + flags = flags & REISERFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE; sd_attrs_to_i_attrs(flags, inode); REISERFS_I(inode)->i_attrs = flags; inode->i_ctime = CURRENT_TIME_SEC; @@ -83,7 +91,8 @@ int reiserfs_ioctl(struct inode *inode, case REISERFS_IOC_SETVERSION: if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || + (filp && MNT_IS_RDONLY(filp->f_vfsmnt))) return -EROFS; if (get_user(inode->i_generation, (int __user *)arg)) return -EFAULT; diff -NurpP --minimal linux-2.6.19/fs/reiserfs/namei.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/namei.c --- linux-2.6.19/fs/reiserfs/namei.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/namei.c 2006-11-08 04:57:47 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); @@ -361,6 +362,7 @@ static struct dentry *reiserfs_lookup(st reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); } + dx_propagate_tag(nd, inode); /* Propogate the priv_object flag so we know we're in the priv tree */ if (is_reiserfs_priv_object(dir)) @@ -596,6 +598,7 @@ static int new_inode_init(struct inode * } else { inode->i_gid = current->fsgid; } + inode->i_tag = dx_current_fstag(inode->i_sb); DQUOT_INIT(inode); return 0; } @@ -1542,6 +1545,7 @@ struct inode_operations reiserfs_dir_ino .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; /* @@ -1558,6 +1562,7 @@ struct inode_operations reiserfs_symlink .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; @@ -1571,5 +1576,6 @@ struct inode_operations reiserfs_special .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .sync_flags = reiserfs_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/reiserfs/stree.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/stree.c --- linux-2.6.19/fs/reiserfs/stree.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/stree.c 2006-11-08 04:57:50 +0100 @@ -56,6 +56,7 @@ #include #include #include +#include /* Does the buffer contain a disk block which is in the tree. */ inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) @@ -1297,6 +1298,7 @@ int reiserfs_delete_item(struct reiserfs "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); /* Return deleted body length */ @@ -1385,6 +1387,7 @@ void reiserfs_delete_solid_item(struct r #endif DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + DLIMIT_FREE_SPACE(inode, quota_cut_bytes); } break; } @@ -1738,6 +1741,7 @@ int reiserfs_cut_from_item(struct reiser "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); #endif + DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes); DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); return n_ret_value; } @@ -1979,6 +1983,11 @@ int reiserfs_paste_into_item(struct reis pathrelse(p_s_search_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) { + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + pathrelse(p_s_search_path); + return -ENOSPC; + } init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); #ifdef DISPLACE_NEW_PACKING_LOCALITIES @@ -2031,6 +2040,7 @@ int reiserfs_paste_into_item(struct reis n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); #endif + DLIMIT_FREE_SPACE(inode, n_pasted_size); DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); return retval; } @@ -2068,6 +2078,11 @@ int reiserfs_insert_item(struct reiserfs pathrelse(p_s_path); return -EDQUOT; } + if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) { + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + pathrelse(p_s_path); + return -ENOSPC; + } } init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); @@ -2115,7 +2130,9 @@ int reiserfs_insert_item(struct reiserfs "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); #endif - if (inode) + if (inode) { + DLIMIT_FREE_SPACE(inode, quota_bytes); DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); + } return retval; } diff -NurpP --minimal linux-2.6.19/fs/reiserfs/super.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/super.c --- linux-2.6.19/fs/reiserfs/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/super.c 2006-11-08 21:52:09 +0100 @@ -137,7 +137,7 @@ static int remove_save_link_only(struct } #ifdef CONFIG_QUOTA -static int reiserfs_quota_on_mount(struct super_block *, int); +static int reiserfs_quota_on_mount(struct dqhash *, int); #endif /* look for uncompleted unlinks and truncates and complete them */ @@ -177,7 +177,7 @@ static int finish_unfinished(struct supe /* Turn on quotas so that they are updated correctly */ for (i = 0; i < MAXQUOTAS; i++) { if (REISERFS_SB(s)->s_qf_names[i]) { - int ret = reiserfs_quota_on_mount(s, i); + int ret = reiserfs_quota_on_mount(s->s_dqh, i); if (ret < 0) reiserfs_warning(s, "reiserfs: cannot turn on journalled quota: error %d", @@ -291,8 +291,8 @@ static int finish_unfinished(struct supe #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i]) - vfs_quota_off_mount(s, i); + if (dqh_dqopt(s->s_dqh)->files[i]) + vfs_quota_off_mount(s->s_dqh, i); } if (ms_active_set) /* Restore the flag back */ @@ -587,9 +587,9 @@ static void reiserfs_clear_inode(struct #endif #ifdef CONFIG_QUOTA -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, +static ssize_t reiserfs_quota_write(struct dqhash *, int, const char *, size_t, loff_t); -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, +static ssize_t reiserfs_quota_read(struct dqhash *, int, char *, size_t, loff_t); #endif @@ -622,8 +622,8 @@ static int reiserfs_write_dquot(struct d static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); -static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, char *); +static int reiserfs_write_info(struct dqhash *, int); +static int reiserfs_quota_on(struct dqhash *, int, int, char *); static struct dquot_operations reiserfs_quota_operations = { .initialize = reiserfs_dquot_initialize, @@ -885,6 +885,14 @@ static int reiserfs_parse_options(struct {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, #endif +#ifndef CONFIG_TAGGING_NONE + {"tagxid",.setmask = 1 << REISERFS_TAGGED}, + {"tag",.setmask = 1 << REISERFS_TAGGED}, + {"notag",.clrmask = 1 << REISERFS_TAGGED}, +#endif +#ifdef CONFIG_PROPAGATE + {"tag",.arg_required = 'T',.values = NULL}, +#endif #ifdef CONFIG_REISERFS_FS_POSIX_ACL {"acl",.setmask = 1 << REISERFS_POSIXACL}, {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, @@ -981,7 +989,7 @@ static int reiserfs_parse_options(struct if (c == 'u' || c == 'g') { int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - if (sb_any_quota_enabled(s)) { + if (dqh_any_quota_enabled(s->s_dqh)) { reiserfs_warning(s, "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); return 0; @@ -1044,7 +1052,7 @@ static int reiserfs_parse_options(struct } /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ if (!(*mount_options & (1 << REISERFS_QUOTA)) - && sb_any_quota_enabled(s)) { + && dqh_any_quota_enabled(s->s_dqh)) { reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on."); return 0; @@ -1146,6 +1154,12 @@ static int reiserfs_remount(struct super return -EINVAL; } + if ((mount_options & (1 << REISERFS_TAGGED)) && + !(s->s_flags & MS_TAGGED)) { + reiserfs_warning(s, "reiserfs: tagging not permitted on remount."); + return -EINVAL; + } + handle_attrs(s); /* Add options that are safe here */ @@ -1336,7 +1350,7 @@ static int read_super_block(struct super s->s_export_op = &reiserfs_export_ops; #ifdef CONFIG_QUOTA s->s_qcop = &reiserfs_qctl_operations; - s->dq_op = &reiserfs_quota_operations; + s->s_qop = &reiserfs_quota_operations; #endif /* new format is limited by the 32 bit wide i_blocks field, want to @@ -1594,6 +1608,10 @@ static int reiserfs_fill_super(struct su goto error; } + /* map mount option tagxid */ + if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED)) + s->s_flags |= MS_TAGGED; + rs = SB_DISK_SUPER_BLOCK(s); /* Let's do basic sanity check to verify that underlying device is not smaller than the filesystem. If the check fails then abort and scream, @@ -1869,16 +1887,16 @@ static int reiserfs_write_dquot(struct d struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_commit(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: @@ -1891,20 +1909,20 @@ static int reiserfs_acquire_dquot(struct struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_acquire(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: - reiserfs_write_unlock(dquot->dq_sb); + reiserfs_write_unlock(dquot->dq_dqh->dqh_sb); return ret; } @@ -1913,37 +1931,38 @@ static int reiserfs_release_dquot(struct struct reiserfs_transaction_handle th; int ret, err; - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock(dquot->dq_dqh->dqh_sb); ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + journal_begin(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (ret) goto out; ret = dquot_release(dquot); err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + journal_end(&th, dquot->dq_dqh->dqh_sb, + REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb)); if (!ret && err) ret = err; out: - reiserfs_write_unlock(dquot->dq_sb); + reiserfs_write_unlock(dquot->dq_dqh->dqh_sb); return ret; } static int reiserfs_mark_dquot_dirty(struct dquot *dquot) { /* Are we journalling quotas? */ - if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { + if (REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] || + REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return reiserfs_write_dquot(dquot); } else return dquot_mark_dquot_dirty(dquot); } -static int reiserfs_write_info(struct super_block *sb, int type) +static int reiserfs_write_info(struct dqhash *hash, int type) { struct reiserfs_transaction_handle th; + struct super_block *sb = hash->dqh_sb; int ret, err; /* Data block + inode block */ @@ -1951,7 +1970,7 @@ static int reiserfs_write_info(struct su ret = journal_begin(&th, sb, 2); if (ret) goto out; - ret = dquot_commit_info(sb, type); + ret = dquot_commit_info(hash, type); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; @@ -1963,18 +1982,21 @@ static int reiserfs_write_info(struct su /* * Turn on quotas during mount time - we need to find the quota file and such... */ -static int reiserfs_quota_on_mount(struct super_block *sb, int type) +static int reiserfs_quota_on_mount(struct dqhash *hash, int type) { - return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + struct super_block *sb = hash->dqh_sb; + + return vfs_quota_on_mount(hash, REISERFS_SB(sb)->s_qf_names[type], REISERFS_SB(sb)->s_jquota_fmt, type); } /* * Standard function to be called on quota_on */ -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, +static int reiserfs_quota_on(struct dqhash *hash, int type, int format_id, char *path) { + struct super_block *sb = hash->dqh_sb; int err; struct nameidata nd; @@ -1999,7 +2021,7 @@ static int reiserfs_quota_on(struct supe if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Quotafile not of fs root? */ if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) @@ -2007,17 +2029,18 @@ static int reiserfs_quota_on(struct supe "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); path_release(&nd); - return vfs_quota_on(sb, type, format_id, path); + return vfs_quota_on(hash, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, +static ssize_t reiserfs_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; size_t toread; @@ -2059,10 +2082,11 @@ static ssize_t reiserfs_quota_read(struc /* Write to quotafile (we know the transaction is already started and has * enough credits) */ -static ssize_t reiserfs_quota_write(struct super_block *sb, int type, +static ssize_t reiserfs_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; unsigned long blk = off >> sb->s_blocksize_bits; int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; diff -NurpP --minimal linux-2.6.19/fs/reiserfs/xattr.c linux-2.6.19-vs2.1.x-t1/fs/reiserfs/xattr.c --- linux-2.6.19/fs/reiserfs/xattr.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/reiserfs/xattr.c 2006-11-08 04:57:52 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -823,7 +824,7 @@ int reiserfs_delete_xattrs(struct inode if (dir->d_inode->i_nlink <= 2) { root = get_xa_root(inode->i_sb); reiserfs_write_lock_xattrs(inode->i_sb); - err = vfs_rmdir(root->d_inode, dir); + err = vfs_rmdir(root->d_inode, dir, NULL); reiserfs_write_unlock_xattrs(inode->i_sb); dput(root); } else { diff -NurpP --minimal linux-2.6.19/fs/stat.c linux-2.6.19-vs2.1.x-t1/fs/stat.c --- linux-2.6.19/fs/stat.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/stat.c 2006-11-08 04:57:46 +0100 @@ -27,6 +27,7 @@ void generic_fillattr(struct inode *inod stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; + stat->tag = inode->i_tag; stat->rdev = inode->i_rdev; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; diff -NurpP --minimal linux-2.6.19/fs/super.c linux-2.6.19-vs2.1.x-t1/fs/super.c --- linux-2.6.19/fs/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/super.c 2006-11-30 19:49:06 +0100 @@ -37,6 +37,9 @@ #include #include #include +#include +#include +#include #include @@ -84,15 +87,14 @@ static struct super_block *alloc_super(s s->s_count = S_BIAS; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); - mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; - s->dq_op = sb_dquot_ops; + s->s_qop = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; + /* quick hack to make dqhash id unique, sufficient for now */ + s->s_dqh = new_dqhash(s, (unsigned long)s); } out: return s; @@ -107,6 +109,7 @@ out: static inline void destroy_super(struct super_block *s) { security_sb_free(s); + dqhput(s->s_dqh); kfree(s); } @@ -178,7 +181,7 @@ void deactivate_super(struct super_block if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); - DQUOT_OFF(s); + DQUOT_OFF(s->s_dqh); down_write(&s->s_umount); fs->kill_sb(s); put_filesystem(fs); @@ -229,7 +232,7 @@ static int grab_super(struct super_block void __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); + DQUOT_SYNC(sb->s_dqh); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); @@ -853,6 +856,7 @@ struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; + struct super_block *sb; char *secdata = NULL; int error; @@ -878,7 +882,14 @@ vfs_kern_mount(struct file_system_type * if (error < 0) goto out_free_secdata; - error = security_sb_kern_mount(mnt->mnt_sb, secdata); + sb = mnt->mnt_sb; + error = -EPERM; + if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; @@ -906,9 +917,17 @@ do_kern_mount(const char *fstype, int fl { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; + if (!type) return ERR_PTR(-ENODEV); + + mnt = ERR_PTR(-EPERM); + if ((type->fs_flags & FS_BINARY_MOUNTDATA) && + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT)) + goto out_put; + mnt = vfs_kern_mount(type, flags, name, data); +out_put: put_filesystem(type); return mnt; } diff -NurpP --minimal linux-2.6.19/fs/sysfs/mount.c linux-2.6.19-vs2.1.x-t1/fs/sysfs/mount.c --- linux-2.6.19/fs/sysfs/mount.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/sysfs/mount.c 2006-11-08 04:57:53 +0100 @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -38,7 +36,7 @@ static int sysfs_fill_super(struct super sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sb->s_time_gran = 1; sysfs_sb = sb; diff -NurpP --minimal linux-2.6.19/fs/udf/super.c linux-2.6.19-vs2.1.x-t1/fs/udf/super.c --- linux-2.6.19/fs/udf/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/udf/super.c 2006-11-08 04:57:51 +0100 @@ -1576,7 +1576,7 @@ static int udf_fill_super(struct super_b /* Fill in the rest of the superblock */ sb->s_op = &udf_sb_ops; - sb->dq_op = NULL; + sb->s_qop = NULL; sb->s_dirt = 0; sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; diff -NurpP --minimal linux-2.6.19/fs/ufs/super.c linux-2.6.19-vs2.1.x-t1/fs/ufs/super.c --- linux-2.6.19/fs/ufs/super.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/ufs/super.c 2006-11-08 04:57:51 +0100 @@ -930,7 +930,7 @@ magic_found: * Read ufs_super_block into internal data structures */ sb->s_op = &ufs_super_ops; - sb->dq_op = NULL; /***/ + sb->s_qop = NULL; /***/ sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno); @@ -1248,8 +1248,8 @@ static void destroy_inodecache(void) } #ifdef CONFIG_QUOTA -static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); -static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); +static ssize_t ufs_quota_read(struct dqhash *, int, char *,size_t, loff_t); +static ssize_t ufs_quota_write(struct dqhash *, int, const char *, size_t, loff_t); #endif static struct super_operations ufs_super_ops = { @@ -1274,10 +1274,11 @@ static struct super_operations ufs_super * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and noone else should touch the files) * we don't have to be afraid of races */ -static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data, +static ssize_t ufs_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); @@ -1313,10 +1314,11 @@ static ssize_t ufs_quota_read(struct sup } /* Write to quotafile */ -static ssize_t ufs_quota_write(struct super_block *sb, int type, +static ssize_t ufs_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off) { - struct inode *inode = sb_dqopt(sb)->files[type]; + struct inode *inode = dqh_dqopt(hash)->files[type]; + struct super_block *sb = hash->dqh_sb; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); diff -NurpP --minimal linux-2.6.19/fs/utimes.c linux-2.6.19-vs2.1.x-t1/fs/utimes.c --- linux-2.6.19/fs/utimes.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/utimes.c 2006-11-08 22:44:42 +0100 @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -32,7 +34,7 @@ asmlinkage long sys_utime(char __user * inode = nd.dentry->d_inode; error = -EROFS; - if (IS_RDONLY(inode)) + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt)) goto dput_and_out; /* Don't worry, the checks are done in inode_change_ok() */ @@ -83,14 +85,13 @@ long do_utimes(int dfd, char __user *fil struct iattr newattrs; error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); - if (error) goto out; - inode = nd.dentry->d_inode; - error = -EROFS; - if (IS_RDONLY(inode)) + error = cow_check_and_break(&nd); + if (error) goto dput_and_out; + inode = nd.dentry->d_inode; /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; diff -NurpP --minimal linux-2.6.19/fs/xattr.c linux-2.6.19-vs2.1.x-t1/fs/xattr.c --- linux-2.6.19/fs/xattr.c 2006-11-30 21:19:28 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xattr.c 2006-11-08 21:52:09 +0100 @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -195,7 +196,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); */ static long setxattr(struct dentry *d, char __user *name, void __user *value, - size_t size, int flags) + size_t size, int flags, struct vfsmount *mnt) { int error; void *kvalue = NULL; @@ -222,6 +223,9 @@ setxattr(struct dentry *d, char __user * } } + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + error = vfs_setxattr(d, kname, kvalue, size, flags); kfree(kvalue); return error; @@ -237,7 +241,7 @@ sys_setxattr(char __user *path, char __u error = user_path_walk(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -252,7 +256,7 @@ sys_lsetxattr(char __user *path, char __ error = user_path_walk_link(path, &nd); if (error) return error; - error = setxattr(nd.dentry, name, value, size, flags); + error = setxattr(nd.dentry, name, value, size, flags, nd.mnt); path_release(&nd); return error; } @@ -270,7 +274,7 @@ sys_fsetxattr(int fd, char __user *name, return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode); - error = setxattr(dentry, name, value, size, flags); + error = setxattr(dentry, name, value, size, flags, f->f_vfsmnt); fput(f); return error; } @@ -432,7 +436,7 @@ sys_flistxattr(int fd, char __user *list * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -443,6 +447,9 @@ removexattr(struct dentry *d, char __use if (error < 0) return error; + if (MNT_IS_RDONLY(mnt)) + return -EROFS; + return vfs_removexattr(d, kname); } @@ -455,7 +462,7 @@ sys_removexattr(char __user *path, char error = user_path_walk(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -469,7 +476,7 @@ sys_lremovexattr(char __user *path, char error = user_path_walk_link(path, &nd); if (error) return error; - error = removexattr(nd.dentry, name); + error = removexattr(nd.dentry, name, nd.mnt); path_release(&nd); return error; } @@ -486,7 +493,7 @@ sys_fremovexattr(int fd, char __user *na return error; dentry = f->f_dentry; audit_inode(NULL, dentry->d_inode); - error = removexattr(dentry, name); + error = removexattr(dentry, name, f->f_vfsmnt); fput(f); return error; } diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_file.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_file.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_file.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_file.c 2006-11-08 04:57:51 +0100 @@ -453,6 +453,7 @@ const struct file_operations xfs_file_op .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read, .splice_write = xfs_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, @@ -476,6 +477,7 @@ const struct file_operations xfs_invis_f .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, .sendfile = xfs_file_sendfile_invis, + .sendpage = generic_file_sendpage, .splice_read = xfs_file_splice_read_invis, .splice_write = xfs_file_splice_write_invis, .unlocked_ioctl = xfs_file_ioctl_invis, diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_ioctl.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_ioctl.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_ioctl.c 2006-11-20 21:12:32 +0100 @@ -1100,6 +1100,8 @@ xfs_ioc_fsgeometry( #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x04000000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x08000000 /* immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1140,6 +1142,10 @@ xfs_di2lxflags( if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= LINUX_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_iops.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_iops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_iops.c 2006-11-30 18:53:18 +0100 @@ -53,6 +53,7 @@ #include #include #include +#include /* * Get a XFS inode from a given vnode. @@ -402,6 +403,7 @@ xfs_vn_lookup( d_add(dentry, NULL); return NULL; } + dx_propagate_tag(nd, vn_to_inode(cvp)); return d_splice_alias(vn_to_inode(cvp), dentry); } @@ -659,6 +661,10 @@ xfs_vn_setattr( int flags = 0; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + if (ia_valid & ATTR_UID) { vattr.va_mask |= XFS_AT_UID; vattr.va_uid = attr->ia_uid; @@ -667,6 +673,10 @@ xfs_vn_setattr( vattr.va_mask |= XFS_AT_GID; vattr.va_gid = attr->ia_gid; } + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) { + vattr.va_mask |= XFS_AT_TAG; + vattr.va_tag = attr->ia_tag; + } if (ia_valid & ATTR_SIZE) { vattr.va_mask |= XFS_AT_SIZE; vattr.va_size = attr->ia_size; @@ -712,6 +722,42 @@ xfs_vn_truncate( } STATIC int +xfs_vn_sync_flags(struct inode *inode) +{ + unsigned int oldflags, newflags; + int flags = 0; + int error; + bhv_vattr_t vattr; + bhv_vnode_t *vp = vn_from_inode(inode); + + memset(&vattr, 0, sizeof vattr); + + vattr.va_mask = XFS_AT_XFLAGS; + error = bhv_vop_getattr(vp, &vattr, 0, NULL); + + if (error) + return error; + oldflags = vattr.va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + + if (IS_IMMUTABLE(inode)) + newflags |= XFS_XFLAG_IMMUTABLE; + if (IS_IUNLINK(inode)) + newflags |= XFS_XFLAG_IUNLINK; + if (IS_BARRIER(inode)) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) { + vattr.va_xflags = newflags; + vattr.va_mask |= XFS_AT_XFLAGS; + error = bhv_vop_setattr(vp, &vattr, flags, NULL); + } + vn_revalidate(vp); + return error; +} + +STATIC int xfs_vn_setxattr( struct dentry *dentry, const char *name, @@ -824,6 +870,7 @@ struct inode_operations xfs_inode_operat .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_dir_inode_operations = { @@ -843,6 +890,7 @@ struct inode_operations xfs_dir_inode_op .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; struct inode_operations xfs_symlink_inode_operations = { @@ -856,4 +904,5 @@ struct inode_operations xfs_symlink_inod .getxattr = xfs_vn_getxattr, .listxattr = xfs_vn_listxattr, .removexattr = xfs_vn_removexattr, + .sync_flags = xfs_vn_sync_flags, }; diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_linux.h --- linux-2.6.19/fs/xfs/linux-2.6/xfs_linux.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_linux.h 2006-11-08 04:57:47 +0100 @@ -139,6 +139,7 @@ BUFFER_FNS(PrivateStart, unwritten); #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) +#define current_fstag(cred,vp) (dx_current_fstag(vn_to_inode(vp)->i_sb)) #define current_test_flags(f) (current->flags & (f)) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_super.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_super.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_super.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_super.c 2006-11-20 21:12:32 +0100 @@ -158,6 +158,7 @@ xfs_revalidate_inode( inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; + inode->i_tag = ip->i_d.di_tag; switch (inode->i_mode & S_IFMT) { case S_IFBLK: @@ -185,6 +186,14 @@ xfs_revalidate_inode( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else @@ -708,6 +717,12 @@ xfs_fs_remount( int error; error = bhv_vfs_parseargs(vfsp, options, args, 1); + if ((args->flags2 & XFSMNT2_TAGGED) && + !(sb->s_flags & MS_TAGGED)) { + printk("XFS: %s: tagging not permitted on remount.\n", + sb->s_id); + error = EINVAL; + } if (!error) error = bhv_vfs_mntupdate(vfsp, flags, args); kmem_free(args, sizeof(*args)); @@ -731,36 +746,40 @@ xfs_fs_show_options( STATIC int xfs_fs_quotasync( - struct super_block *sb, + struct dqhash *hash, int type) { + struct super_block *sb = hash->dqh_sb; return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XQUOTASYNC, 0, NULL); } STATIC int xfs_fs_getxstate( - struct super_block *sb, + struct dqhash *hash, struct fs_quota_stat *fqs) { + struct super_block *sb = hash->dqh_sb; return -bhv_vfs_quotactl(vfs_from_sb(sb), Q_XGETQSTAT, 0, (caddr_t)fqs); } STATIC int xfs_fs_setxstate( - struct super_block *sb, + struct dqhash *hash, unsigned int flags, int op) { + struct super_block *sb = hash->dqh_sb; return -bhv_vfs_quotactl(vfs_from_sb(sb), op, 0, (caddr_t)&flags); } STATIC int xfs_fs_getxquota( - struct super_block *sb, + struct dqhash *hash, int type, qid_t id, struct fs_disk_quota *fdq) { + struct super_block *sb = hash->dqh_sb; return -bhv_vfs_quotactl(vfs_from_sb(sb), (type == USRQUOTA) ? Q_XGETQUOTA : ((type == GRPQUOTA) ? Q_XGETGQUOTA : @@ -769,11 +788,12 @@ xfs_fs_getxquota( STATIC int xfs_fs_setxquota( - struct super_block *sb, + struct dqhash *hash, int type, qid_t id, struct fs_disk_quota *fdq) { + struct super_block *sb = hash->dqh_sb; return -bhv_vfs_quotactl(vfs_from_sb(sb), (type == USRQUOTA) ? Q_XSETQLIM : ((type == GRPQUOTA) ? Q_XSETGQLIM : @@ -803,6 +823,9 @@ xfs_fs_fill_super( sb_min_blocksize(sb, BBSIZE); sb->s_export_op = &xfs_export_operations; sb->s_qcop = &xfs_quotactl_operations; +#ifdef CONFIG_QUOTACTL + sb->s_dqh->dqh_qcop = &xfs_quotactl_operations; +#endif sb->s_op = &xfs_super_operations; error = bhv_vfs_mount(vfsp, args, NULL); diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_sysctl.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_sysctl.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_sysctl.c 2006-09-20 16:58:39 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_sysctl.c 2006-11-08 04:57:40 +0100 @@ -57,79 +57,79 @@ xfs_stats_clear_proc_handler( STATIC ctl_table xfs_table[] = { {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.error_level.min, &xfs_params.error_level.max}, {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max}, {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max}, {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max}, {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max}, {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.rotorstep.min, &xfs_params.rotorstep.max}, {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max}, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, - &sysctl_intvec, NULL, + NULL, &sysctl_intvec, NULL, &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, #endif /* CONFIG_PROC_FS */ diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_vnode.c --- linux-2.6.19/fs/xfs/linux-2.6/xfs_vnode.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_vnode.c 2006-11-08 04:57:46 +0100 @@ -119,6 +119,7 @@ vn_revalidate_core( inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; + inode->i_tag = vap->va_tag; inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; @@ -126,6 +127,14 @@ vn_revalidate_core( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else diff -NurpP --minimal linux-2.6.19/fs/xfs/linux-2.6/xfs_vnode.h linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_vnode.h --- linux-2.6.19/fs/xfs/linux-2.6/xfs_vnode.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/linux-2.6/xfs_vnode.h 2006-11-08 04:57:46 +0100 @@ -350,6 +350,7 @@ typedef struct bhv_vattr { xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ + tag_t va_tag; /* owner group id */ xfs_ino_t va_nodeid; /* file id */ xfs_off_t va_size; /* file size in bytes */ u_long va_blocksize; /* blocksize preferred for i/o */ @@ -398,13 +399,15 @@ typedef struct bhv_vattr { #define XFS_AT_PROJID 0x04000000 #define XFS_AT_SIZE_NOPERM 0x08000000 #define XFS_AT_GENCOUNT 0x10000000 +#define XFS_AT_TAG 0x20000000 #define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\ + XFS_AT_TAG) #define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ diff -NurpP --minimal linux-2.6.19/fs/xfs/quota/xfs_qm_syscalls.c linux-2.6.19-vs2.1.x-t1/fs/xfs/quota/xfs_qm_syscalls.c --- linux-2.6.19/fs/xfs/quota/xfs_qm_syscalls.c 2006-09-20 16:58:40 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/quota/xfs_qm_syscalls.c 2006-11-30 18:53:18 +0100 @@ -17,6 +17,7 @@ */ #include +#include #include "xfs.h" #include "xfs_fs.h" @@ -213,7 +214,7 @@ xfs_qm_scall_quotaoff( xfs_qoff_logitem_t *qoffstart; int nculprits; - if (!force && !capable(CAP_SYS_ADMIN)) + if (!force && !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); /* * No file system can have quotas enabled on disk but not in core. @@ -382,7 +383,7 @@ xfs_qm_scall_trunc_qfiles( int error; xfs_inode_t *qip; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); error = 0; if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) { @@ -427,7 +428,7 @@ xfs_qm_scall_quotaon( uint accflags; __int64_t sbflags; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -598,7 +599,7 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if (!capable(CAP_SYS_ADMIN)) + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL)) return XFS_ERROR(EPERM); if ((newlim->d_fieldmask & diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_clnt.h linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_clnt.h --- linux-2.6.19/fs/xfs/xfs_clnt.h 2006-06-18 04:54:50 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_clnt.h 2006-11-08 04:57:46 +0100 @@ -99,5 +99,7 @@ struct xfs_mount_args { */ #define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred * I/O size in stat(2) */ +#define XFSMNT2_TAGGED 0x80000000 /* context tagging */ + #endif /* __XFS_CLNT_H__ */ diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_dinode.h linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_dinode.h --- linux-2.6.19/fs/xfs/xfs_dinode.h 2006-09-20 16:58:40 +0200 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_dinode.h 2006-11-08 04:57:46 +0100 @@ -53,7 +53,8 @@ typedef struct xfs_dinode_core __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_tag; /* context tagging */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ @@ -257,6 +258,9 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ +#define XFS_DIFLAG_BARRIER_BIT 14 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 15 /* immutable unlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -271,12 +275,15 @@ typedef enum xfs_dinode_fmt #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) #define XFS_DIFLAG_ANY \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ - XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) + XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_BARRIER | \ + XFS_DIFLAG_IUNLINK) #endif /* __XFS_DINODE_H__ */ diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_fs.h linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_fs.h --- linux-2.6.19/fs/xfs/xfs_fs.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_fs.h 2006-11-08 04:57:46 +0100 @@ -66,6 +66,8 @@ struct fsxattr { #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* @@ -294,7 +296,8 @@ typedef struct xfs_bstat { __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_tag; /* context tagging */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_inode.c linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_inode.c --- linux-2.6.19/fs/xfs/xfs_inode.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_inode.c 2006-11-30 20:55:45 +0100 @@ -50,6 +50,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -736,20 +737,35 @@ xfs_xlate_dinode_core( xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; + uint32_t uid = 0, gid = 0; + uint16_t tag = 0; ASSERT(dir); + if (dir < 0) { + tag = mem_core->di_tag; + /* FIXME: supposed to use superblock flag */ + uid = TAGINO_UID(1, mem_core->di_uid, tag); + gid = TAGINO_GID(1, mem_core->di_gid, tag); + tag = TAGINO_TAG(1, tag); + } + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); - INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); - INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_uid, uid, dir, arch); + INT_XLATE(buf_core->di_gid, gid, dir, arch); + INT_XLATE(buf_core->di_tag, tag, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { + /* FIXME: supposed to use superblock flag */ + mem_core->di_uid = INOTAG_UID(1, uid, gid); + mem_core->di_gid = INOTAG_GID(1, uid, gid); + mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -797,6 +813,10 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_PREALLOC; if (di_flags & XFS_DIFLAG_IMMUTABLE) flags |= XFS_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_IUNLINK) + flags |= XFS_XFLAG_IUNLINK; + if (di_flags & XFS_DIFLAG_BARRIER) + flags |= XFS_XFLAG_BARRIER; if (di_flags & XFS_DIFLAG_APPEND) flags |= XFS_XFLAG_APPEND; if (di_flags & XFS_DIFLAG_SYNC) @@ -1128,6 +1148,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_tag = current_fstag(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_itable.c linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_itable.c --- linux-2.6.19/fs/xfs/xfs_itable.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_itable.c 2006-11-08 04:57:46 +0100 @@ -89,6 +89,7 @@ xfs_bulkstat_one_iget( buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; + buf->bs_tag = dic->di_tag; buf->bs_size = dic->di_size; vn_atime_to_bstime(vp, &buf->bs_atime); buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; @@ -163,6 +164,7 @@ xfs_bulkstat_one_dinode( buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT); buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT); buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT); + buf->bs_tag = INT_GET(dic->di_tag, ARCH_CONVERT); buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT); buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT); buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT); diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_mount.h linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_mount.h --- linux-2.6.19/fs/xfs/xfs_mount.h 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_mount.h 2006-11-08 04:57:46 +0100 @@ -460,6 +460,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock counters */ +#define XFS_MOUNT_TAGGED (1ULL << 31) /* context tagging */ /* * Default minimum read and write sizes. diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_vfsops.c linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_vfsops.c --- linux-2.6.19/fs/xfs/xfs_vfsops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_vfsops.c 2006-11-08 04:57:47 +0100 @@ -300,6 +300,8 @@ xfs_start_flags( if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + if (ap->flags2 & XFSMNT2_TAGGED) + mp->m_flags |= XFS_MOUNT_TAGGED; /* * no recovery flag requires a read-only mount @@ -394,6 +396,8 @@ xfs_finish_flags( return XFS_ERROR(EINVAL); } + if (ap->flags2 & XFSMNT2_TAGGED) + vfs->vfs_super->s_flags |= MS_TAGGED; return 0; } @@ -1645,6 +1649,9 @@ xfs_vget( * in stat(). */ #define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_TAGXID "tagxid" /* context tagging for inodes */ +#define MNTOPT_TAGGED "tag" /* context tagging for inodes */ +#define MNTOPT_NOTAGTAG "notag" /* do not use context tagging */ STATIC unsigned long suffix_strtoul(char *s, char **endp, unsigned int base) @@ -1831,6 +1838,19 @@ xfs_parseargs( args->flags |= XFSMNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { args->flags &= ~XFSMNT_ATTR2; +#ifndef CONFIG_TAGGING_NONE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + args->flags2 |= XFSMNT2_TAGGED; + } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) { + args->flags2 &= ~XFSMNT2_TAGGED; + } else if (!strcmp(this_char, MNTOPT_TAGXID)) { + args->flags2 |= XFSMNT2_TAGGED; +#endif +#ifdef CONFIG_PROPAGATE + } else if (!strcmp(this_char, MNTOPT_TAGGED)) { + /* use value */ + args->flags2 |= XFSMNT2_TAGGED; +#endif } else if (!strcmp(this_char, "osyncisdsync")) { /* no-op, this is now the default */ cmn_err(CE_WARN, diff -NurpP --minimal linux-2.6.19/fs/xfs/xfs_vnodeops.c linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_vnodeops.c --- linux-2.6.19/fs/xfs/xfs_vnodeops.c 2006-11-30 21:19:29 +0100 +++ linux-2.6.19-vs2.1.x-t1/fs/xfs/xfs_vnodeops.c 2006-11-20 21:12:32 +0100 @@ -160,6 +160,7 @@ xfs_getattr( vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; + vap->va_tag = ip->i_d.di_tag; vap->va_projid = ip->i_d.di_projid; /* @@ -260,6 +261,7 @@ xfs_setattr( uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; + tag_t tag=0, itag=0; int timeflags = 0; bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; @@ -316,6 +318,7 @@ xfs_setattr( (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; + /* FIXME: handle tagging? */ if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; @@ -395,6 +398,8 @@ xfs_setattr( if (mask & (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| XFS_AT_GID|XFS_AT_PROJID)) { + /* FIXME: handle tagging? */ + /* * CAP_FOWNER overrides the following restrictions: * @@ -443,7 +448,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -451,10 +456,12 @@ xfs_setattr( * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + itag = ip->i_d.di_tag; + iprojid = ip->i_d.di_projid; uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + tag = (mask & XFS_AT_TAG) ? vap->va_tag : itag; projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : iprojid; @@ -482,6 +489,7 @@ xfs_setattr( if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + /* FIXME: handle tagging? */ ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -707,7 +715,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -723,6 +731,12 @@ xfs_setattr( * Change the ownerships and register quota modifications * in the transaction. */ + if (itag != tag) { + if (XFS_IS_GQUOTA_ON(mp)) { + /* FIXME: handle tag quota? */ + } + ip->i_d.di_tag = tag; + } if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & XFS_AT_UID); @@ -803,6 +817,10 @@ xfs_setattr( di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) diff -NurpP --minimal linux-2.6.19/include/asm-arm/tlb.h linux-2.6.19-vs2.1.x-t1/include/asm-arm/tlb.h --- linux-2.6.19/include/asm-arm/tlb.h 2006-06-18 04:54:58 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/asm-arm/tlb.h 2006-11-08 04:57:40 +0100 @@ -28,6 +28,7 @@ #else /* !CONFIG_MMU */ #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.19/include/asm-arm26/tlb.h linux-2.6.19-vs2.1.x-t1/include/asm-arm26/tlb.h --- linux-2.6.19/include/asm-arm26/tlb.h 2006-01-03 17:30:02 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-arm26/tlb.h 2006-11-08 04:57:40 +0100 @@ -3,6 +3,7 @@ #include #include +#include /* * TLB handling. This allows us to remove pages from the page diff -NurpP --minimal linux-2.6.19/include/asm-arm26/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-arm26/unistd.h --- linux-2.6.19/include/asm-arm26/unistd.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-arm26/unistd.h 2006-11-08 04:57:41 +0100 @@ -302,6 +302,8 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) +#define __NR_vserver (__NR_SYSCALL_BASE+313) + /* * The following SWIs are ARM private. FIXME - make appropriate for arm26 */ diff -NurpP --minimal linux-2.6.19/include/asm-generic/tlb.h linux-2.6.19-vs2.1.x-t1/include/asm-generic/tlb.h --- linux-2.6.19/include/asm-generic/tlb.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-generic/tlb.h 2006-11-08 04:57:40 +0100 @@ -14,6 +14,7 @@ #define _ASM_GENERIC__TLB_H #include +#include #include #include diff -NurpP --minimal linux-2.6.19/include/asm-i386/elf.h linux-2.6.19-vs2.1.x-t1/include/asm-i386/elf.h --- linux-2.6.19/include/asm-i386/elf.h 2006-11-30 21:19:31 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-i386/elf.h 2006-11-08 04:57:53 +0100 @@ -75,7 +75,7 @@ typedef struct user_fxsr_struct elf_fpxr the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#define ELF_ET_DYN_BASE ((TASK_UNMAPPED_BASE) * 2) /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is now struct_user_regs, they are different) */ diff -NurpP --minimal linux-2.6.19/include/asm-ia64/tlb.h linux-2.6.19-vs2.1.x-t1/include/asm-ia64/tlb.h --- linux-2.6.19/include/asm-ia64/tlb.h 2006-09-20 16:58:40 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/asm-ia64/tlb.h 2006-11-08 04:57:40 +0100 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff -NurpP --minimal linux-2.6.19/include/asm-powerpc/systbl.h linux-2.6.19-vs2.1.x-t1/include/asm-powerpc/systbl.h --- linux-2.6.19/include/asm-powerpc/systbl.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-powerpc/systbl.h 2006-11-20 21:12:32 +0100 @@ -260,7 +260,7 @@ COMPAT_SYS_SPU(fstatfs64) SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64) PPC_SYS_SPU(rtas) OLDSYS(debug_setcontext) -SYSCALL(ni_syscall) +SYSX(sys_vserver, sys32_vserver, sys_vserver) COMPAT_SYS(migrate_pages) COMPAT_SYS(mbind) COMPAT_SYS(get_mempolicy) diff -NurpP --minimal linux-2.6.19/include/asm-powerpc/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-powerpc/unistd.h --- linux-2.6.19/include/asm-powerpc/unistd.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-powerpc/unistd.h 2006-11-20 21:12:32 +0100 @@ -275,7 +275,7 @@ #endif #define __NR_rtas 255 #define __NR_sys_debug_setcontext 256 -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 #define __NR_migrate_pages 258 #define __NR_mbind 259 #define __NR_get_mempolicy 260 diff -NurpP --minimal linux-2.6.19/include/asm-s390/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-s390/unistd.h --- linux-2.6.19/include/asm-s390/unistd.h 2006-11-30 21:19:33 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-s390/unistd.h 2006-11-08 04:57:41 +0100 @@ -202,7 +202,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_statfs64 265 #define __NR_fstatfs64 266 #define __NR_remap_file_pages 267 diff -NurpP --minimal linux-2.6.19/include/asm-sparc/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-sparc/unistd.h --- linux-2.6.19/include/asm-sparc/unistd.h 2006-11-30 21:19:34 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-sparc/unistd.h 2006-11-08 21:52:09 +0100 @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.19/include/asm-sparc64/tlb.h linux-2.6.19-vs2.1.x-t1/include/asm-sparc64/tlb.h --- linux-2.6.19/include/asm-sparc64/tlb.h 2006-09-20 16:58:43 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/asm-sparc64/tlb.h 2006-11-08 04:57:40 +0100 @@ -2,6 +2,7 @@ #define _SPARC64_TLB_H #include +#include #include #include #include diff -NurpP --minimal linux-2.6.19/include/asm-sparc64/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-sparc64/unistd.h --- linux-2.6.19/include/asm-sparc64/unistd.h 2006-11-30 21:19:35 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-sparc64/unistd.h 2006-11-08 21:52:09 +0100 @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff -NurpP --minimal linux-2.6.19/include/asm-x86_64/unistd.h linux-2.6.19-vs2.1.x-t1/include/asm-x86_64/unistd.h --- linux-2.6.19/include/asm-x86_64/unistd.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/asm-x86_64/unistd.h 2006-11-08 04:57:41 +0100 @@ -532,7 +532,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 diff -NurpP --minimal linux-2.6.19/include/linux/Kbuild linux-2.6.19-vs2.1.x-t1/include/linux/Kbuild --- linux-2.6.19/include/linux/Kbuild 2006-11-30 21:19:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/Kbuild 2006-11-08 04:57:49 +0100 @@ -345,3 +345,6 @@ unifdef-y += xfrm.h unifdef-y += zftape.h objhdr-y += version.h + +header-y += vserver/ + diff -NurpP --minimal linux-2.6.19/include/linux/capability.h linux-2.6.19-vs2.1.x-t1/include/linux/capability.h --- linux-2.6.19/include/linux/capability.h 2006-06-18 04:55:15 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/capability.h 2006-11-08 04:57:40 +0100 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ /* Allow setting zone reclaim policy */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -288,6 +289,11 @@ typedef __u32 kernel_cap_t; #define CAP_AUDIT_CONTROL 30 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 31 + #ifdef __KERNEL__ /* * Bounding set diff -NurpP --minimal linux-2.6.19/include/linux/devpts_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/devpts_fs.h --- linux-2.6.19/include/linux/devpts_fs.h 2004-08-14 12:55:59 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/devpts_fs.h 2006-11-08 04:57:53 +0100 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n #endif +#define DEVPTS_SUPER_MAGIC 0x00001cd1 + #endif /* _LINUX_DEVPTS_FS_H */ diff -NurpP --minimal linux-2.6.19/include/linux/ext2_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/ext2_fs.h --- linux-2.6.19/include/linux/ext2_fs.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/ext2_fs.h 2006-11-08 04:57:46 +0100 @@ -188,6 +188,8 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL FS_IUNLINK_FL /* Immutable unlink */ #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ @@ -244,7 +246,7 @@ struct ext2_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_tag; /* Context Tag */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -276,6 +278,7 @@ struct ext2_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_tag osd2.linux2.l_i_tag #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -317,8 +320,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ -#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ -#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_TAGGED (1<<24) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt diff -NurpP --minimal linux-2.6.19/include/linux/ext3_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/ext3_fs.h --- linux-2.6.19/include/linux/ext3_fs.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/ext3_fs.h 2006-11-08 04:57:46 +0100 @@ -177,10 +177,20 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETTAG FIOC_SETTAGJ +#endif /* * Inode dynamic state flags @@ -296,7 +306,7 @@ struct ext3_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_tag; /* Context Tag */ __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -330,6 +340,7 @@ struct ext3_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_tag osd2.linux2.l_i_tag #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -384,6 +395,7 @@ struct ext3_inode { #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT3_MOUNT_TAGGED (1<<24) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -812,6 +824,7 @@ struct buffer_head * ext3_bread (handle_ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); +extern int ext3_sync_flags(struct inode *inode); extern void ext3_read_inode (struct inode *); extern int ext3_write_inode (struct inode *, int); diff -NurpP --minimal linux-2.6.19/include/linux/ext3_jbd.h linux-2.6.19-vs2.1.x-t1/include/linux/ext3_jbd.h --- linux-2.6.19/include/linux/ext3_jbd.h 2006-11-30 21:19:37 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/ext3_jbd.h 2006-11-08 04:57:51 +0100 @@ -77,10 +77,10 @@ #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ -#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) -#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) +#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? \ + (DQUOT_INIT_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? \ + (DQUOT_DEL_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) #else #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0 diff -NurpP --minimal linux-2.6.19/include/linux/fs.h linux-2.6.19-vs2.1.x-t1/include/linux/fs.h --- linux-2.6.19/include/linux/fs.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/fs.h 2006-11-08 19:20:31 +0100 @@ -120,6 +120,8 @@ extern int dir_notify_enable; #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ +#define MS_TAGGED (1<<24) /* use generic inode tagging */ +#define MS_TAGID (1<<25) /* use specific tag for this mount */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -146,6 +148,8 @@ extern int dir_notify_enable; #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_BARRIER 1024 /* Barrier for chroot() */ +#define S_IUNLINK 2048 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -162,23 +166,35 @@ extern int dir_notify_enable; */ #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#define IS_RDONLY(inode) __IS_FLG(inode, MS_RDONLY) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) +#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#ifdef CONFIG_VSERVER_COWBL +# define IS_COW(inode) (IS_IUNLINK(inode) && IS_IMMUTABLE(inode)) +# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1)) +#else +# define IS_COW(inode) (0) +# define IS_COW_LINK(inode) (0) +#endif + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -252,11 +268,17 @@ extern int dir_notify_enable; #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define FS_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#ifdef CONFIG_VSERVER_LEGACY +#define FS_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ +#else #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - +#endif #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 @@ -322,6 +344,7 @@ typedef void (dio_iodone_t)(struct kiocb #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 #define ATTR_FILE 8192 +#define ATTR_TAG 16384 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -337,6 +360,7 @@ struct iattr { umode_t ia_mode; uid_t ia_uid; gid_t ia_gid; + tag_t ia_tag; loff_t ia_size; struct timespec ia_atime; struct timespec ia_mtime; @@ -350,6 +374,9 @@ struct iattr { struct file *ia_file; }; +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * Includes for diskquotas. */ @@ -547,6 +574,7 @@ struct inode { unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + tag_t i_tag; dev_t i_rdev; loff_t i_size; struct timespec i_atime; @@ -565,6 +593,9 @@ struct inode { struct file_lock *i_flock; struct address_space *i_mapping; struct address_space i_data; +#ifdef CONFIG_QUOTACTL + struct dqhash *i_dqh; +#endif #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif @@ -735,6 +766,7 @@ struct file { loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; + xid_t f_xid; struct file_ra_state f_ra; unsigned long f_version; @@ -817,6 +849,7 @@ struct file_lock { unsigned char fl_type; loff_t fl_start; loff_t fl_end; + xid_t fl_xid; struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ @@ -918,7 +951,7 @@ struct super_block { unsigned long long s_maxbytes; /* Max file size */ struct file_system_type *s_type; struct super_operations *s_op; - struct dquot_operations *dq_op; + struct dquot_operations *s_qop; struct quotactl_ops *s_qcop; struct export_operations *s_export_op; unsigned long s_flags; @@ -943,7 +976,7 @@ struct super_block { struct block_device *s_bdev; struct list_head s_instances; - struct quota_info s_dquot; /* Diskquota specific options */ + struct dqhash *s_dqh; /* Diskquota hash */ int s_frozen; wait_queue_head_t s_wait_unfrozen; @@ -1013,12 +1046,12 @@ static inline void unlock_super(struct s */ extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern int vfs_mkdir(struct inode *, struct dentry *, int); -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int); -extern int vfs_link(struct dentry *, struct inode *, struct dentry *); -extern int vfs_rmdir(struct inode *, struct dentry *); -extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *); +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *); +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *); +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *); +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* @@ -1158,6 +1191,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + int (*sync_flags) (struct inode *); }; struct seq_file; @@ -1173,6 +1207,7 @@ extern ssize_t vfs_readv(struct file *, unsigned long, loff_t *); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t); /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called @@ -1202,8 +1237,8 @@ struct super_operations { int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct vfsmount *); #ifdef CONFIG_QUOTA - ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); - ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); + ssize_t (*quota_read)(struct dqhash *, int, char *, size_t, loff_t); + ssize_t (*quota_write)(struct dqhash *, int, const char *, size_t, loff_t); #endif }; @@ -1715,7 +1750,7 @@ extern struct inode *new_inode(struct su extern int __remove_suid(struct dentry *, int); extern int should_remove_suid(struct dentry *); extern int remove_suid(struct dentry *); -extern void remove_dquot_ref(struct super_block *, int, struct list_head *); +extern void remove_dquot_ref(struct dqhash *, int, struct list_head *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); @@ -1751,6 +1786,7 @@ extern ssize_t generic_file_buffered_wri extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); @@ -1884,6 +1920,7 @@ extern int dcache_dir_open(struct inode extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *)); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff -NurpP --minimal linux-2.6.19/include/linux/init_task.h linux-2.6.19-vs2.1.x-t1/include/linux/init_task.h --- linux-2.6.19/include/linux/init_task.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/init_task.h 2006-11-08 04:57:40 +0100 @@ -140,6 +140,10 @@ extern struct group_info init_groups; .pi_lock = SPIN_LOCK_UNLOCKED, \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + .xid = 0, \ + .vx_info = NULL, \ + .nid = 0, \ + .nx_info = NULL, \ } diff -NurpP --minimal linux-2.6.19/include/linux/ipc.h linux-2.6.19-vs2.1.x-t1/include/linux/ipc.h --- linux-2.6.19/include/linux/ipc.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/ipc.h 2006-11-08 20:20:37 +0100 @@ -63,6 +63,7 @@ struct kern_ipc_perm key_t key; uid_t uid; gid_t gid; + xid_t xid; uid_t cuid; gid_t cgid; mode_t mode; diff -NurpP --minimal linux-2.6.19/include/linux/kernel.h linux-2.6.19-vs2.1.x-t1/include/linux/kernel.h --- linux-2.6.19/include/linux/kernel.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/kernel.h 2006-11-30 20:55:45 +0100 @@ -17,6 +17,7 @@ #include extern const char linux_banner[]; +extern const char vx_linux_banner[]; #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) diff -NurpP --minimal linux-2.6.19/include/linux/loop.h linux-2.6.19-vs2.1.x-t1/include/linux/loop.h --- linux-2.6.19/include/linux/loop.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/loop.h 2006-11-08 04:57:52 +0100 @@ -45,6 +45,7 @@ struct loop_device { struct loop_func_table *lo_encryption; __u32 lo_init[2]; uid_t lo_key_owner; /* Who set the key */ + xid_t lo_xid; int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); diff -NurpP --minimal linux-2.6.19/include/linux/major.h linux-2.6.19-vs2.1.x-t1/include/linux/major.h --- linux-2.6.19/include/linux/major.h 2006-06-18 04:55:19 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/major.h 2006-11-08 04:57:51 +0100 @@ -15,6 +15,7 @@ #define HD_MAJOR IDE0_MAJOR #define PTY_SLAVE_MAJOR 3 #define TTY_MAJOR 4 +#define VROOT_MAJOR 4 #define TTYAUX_MAJOR 5 #define LP_MAJOR 6 #define VCS_MAJOR 7 diff -NurpP --minimal linux-2.6.19/include/linux/mount.h linux-2.6.19-vs2.1.x-t1/include/linux/mount.h --- linux-2.6.19/include/linux/mount.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/mount.h 2006-11-08 04:57:52 +0100 @@ -27,12 +27,16 @@ struct namespace; #define MNT_NOEXEC 0x04 #define MNT_NOATIME 0x08 #define MNT_NODIRATIME 0x10 +#define MNT_RDONLY 0x20 + +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY)) #define MNT_SHRINKABLE 0x100 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ #define MNT_PNODE_MASK 0x3000 /* propogation flag mask */ +#define MNT_TAGID 0x8000 struct vfsmount { struct list_head mnt_hash; @@ -54,6 +58,7 @@ struct vfsmount { struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ struct namespace *mnt_namespace; /* containing namespace */ int mnt_pinned; + tag_t mnt_tag; /* tagging used for vfsmount */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) diff -NurpP --minimal linux-2.6.19/include/linux/net.h linux-2.6.19-vs2.1.x-t1/include/linux/net.h --- linux-2.6.19/include/linux/net.h 2006-11-30 21:19:38 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/net.h 2006-11-08 04:57:42 +0100 @@ -63,6 +63,7 @@ typedef enum { #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 +#define SOCK_USER_SOCKET 5 #ifndef ARCH_HAS_SOCKET_TYPES /** diff -NurpP --minimal linux-2.6.19/include/linux/nfs_mount.h linux-2.6.19-vs2.1.x-t1/include/linux/nfs_mount.h --- linux-2.6.19/include/linux/nfs_mount.h 2005-08-29 22:25:42 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/nfs_mount.h 2006-11-08 04:57:47 +0100 @@ -61,6 +61,7 @@ struct nfs_mount_data { #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_TAGGED 0x8000 /* context tagging */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff -NurpP --minimal linux-2.6.19/include/linux/nsproxy.h linux-2.6.19-vs2.1.x-t1/include/linux/nsproxy.h --- linux-2.6.19/include/linux/nsproxy.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/nsproxy.h 2006-11-30 20:55:45 +0100 @@ -51,4 +51,10 @@ static inline void exit_task_namespaces( put_nsproxy(ns); } } + +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(&ns->count); +} + #endif diff -NurpP --minimal linux-2.6.19/include/linux/percpu.h linux-2.6.19-vs2.1.x-t1/include/linux/percpu.h --- linux-2.6.19/include/linux/percpu.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/percpu.h 2006-11-08 04:57:40 +0100 @@ -11,7 +11,7 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#define PERCPU_ENOUGH_ROOM 65536 #endif /* diff -NurpP --minimal linux-2.6.19/include/linux/pid.h linux-2.6.19-vs2.1.x-t1/include/linux/pid.h --- linux-2.6.19/include/linux/pid.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/pid.h 2006-11-08 04:57:52 +0100 @@ -8,7 +8,8 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + PIDTYPE_REALPID }; /* diff -NurpP --minimal linux-2.6.19/include/linux/proc_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/proc_fs.h --- linux-2.6.19/include/linux/proc_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/proc_fs.h 2006-11-14 08:50:13 +0100 @@ -54,6 +54,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; loff_t size; struct inode_operations * proc_iops; const struct file_operations * proc_fops; @@ -247,10 +248,13 @@ extern void kclist_add(struct kcore_list union proc_op { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vxi_read)(struct vx_info *vxi, char *page); + int (*proc_nxi_read)(struct nx_info *nxi, char *page); }; struct proc_inode { struct pid *pid; + int vx_flags; int fd; union proc_op op; struct proc_dir_entry *pde; diff -NurpP --minimal linux-2.6.19/include/linux/quota.h linux-2.6.19-vs2.1.x-t1/include/linux/quota.h --- linux-2.6.19/include/linux/quota.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/quota.h 2006-11-08 04:57:51 +0100 @@ -55,6 +55,13 @@ extern spinlock_t dq_data_lock; #define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10)) #define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS) +/* are NULL dqhash ptrs valid? */ +#ifdef HANDLE_DQHASH_NULL +#define dqhash_valid(hash) ((hash) != NULL) +#else +#define dqhash_valid(hash) (0 == 0) +#endif + #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ #define GRPQUOTA 1 /* element used for group quotas */ @@ -176,19 +183,20 @@ struct mem_dqinfo { } u; }; -struct super_block; +struct dqhash; #define DQF_MASK 0xffff /* Mask for format specific flags */ #define DQF_INFO_DIRTY_B 16 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ -extern void mark_info_dirty(struct super_block *sb, int type); +extern void mark_info_dirty(struct dqhash *hash, int type); + #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) #define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list)) #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) -#define sb_dqopt(sb) (&(sb)->s_dquot) -#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) +#define dqh_dqopt(hash) (&(hash)->dqh_dqopt) +#define dqh_dqinfo(hash, type) (dqh_dqopt(hash)->info+(type)) struct dqstats { int lookups; @@ -218,7 +226,7 @@ struct dquot { struct mutex dq_lock; /* dquot IO lock */ atomic_t dq_count; /* Use count */ wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ - struct super_block *dq_sb; /* superblock this applies to */ + struct dqhash *dq_dqh; /* quota hash backpointer */ unsigned int dq_id; /* ID this applies to (uid, gid) */ loff_t dq_off; /* Offset of dquot on disk */ unsigned long dq_flags; /* See DQ_* */ @@ -233,13 +241,14 @@ struct dquot { /* Operations which must be implemented by each quota format */ struct quota_format_ops { - int (*check_quota_file)(struct super_block *sb, int type); /* Detect whether file is in our format */ - int (*read_file_info)(struct super_block *sb, int type); /* Read main info about file - called on quotaon() */ - int (*write_file_info)(struct super_block *sb, int type); /* Write main info about file */ - int (*free_file_info)(struct super_block *sb, int type); /* Called on quotaoff() */ - int (*read_dqblk)(struct dquot *dquot); /* Read structure for one user */ - int (*commit_dqblk)(struct dquot *dquot); /* Write structure for one user */ - int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */ + int (*check_quota_file)(struct dqhash *, int); /* Detect whether file is in our format */ + int (*read_file_info)(struct dqhash *, int); /* Read main info about file - called on quotaon() */ + int (*write_file_info)(struct dqhash *, int); /* Write main info about file */ + int (*free_file_info)(struct dqhash *, int); /* Called on quotaoff() */ + + int (*read_dqblk)(struct dquot *); /* Read structure for one user */ + int (*commit_dqblk)(struct dquot *); /* Write structure for one user */ + int (*release_dqblk)(struct dquot *); /* Called when last reference to dquot is being dropped */ }; /* Operations working with dquots */ @@ -255,22 +264,22 @@ struct dquot_operations { int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ - int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ + int (*write_info) (struct dqhash *, int); /* Write of quota "superblock" */ }; /* Operations handling requests from userspace */ struct quotactl_ops { - int (*quota_on)(struct super_block *, int, int, char *); - int (*quota_off)(struct super_block *, int); - int (*quota_sync)(struct super_block *, int); - int (*get_info)(struct super_block *, int, struct if_dqinfo *); - int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); - int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); - int (*get_xstate)(struct super_block *, struct fs_quota_stat *); - int (*set_xstate)(struct super_block *, unsigned int, int); - int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); - int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); + int (*quota_on)(struct dqhash *, int, int, char *); + int (*quota_off)(struct dqhash *, int); + int (*quota_sync)(struct dqhash *, int); + int (*get_info)(struct dqhash *, int, struct if_dqinfo *); + int (*set_info)(struct dqhash *, int, struct if_dqinfo *); + int (*get_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *); + int (*set_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *); + int (*get_xstate)(struct dqhash *, struct fs_quota_stat *); + int (*set_xstate)(struct dqhash *, unsigned int, int); + int (*get_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *); + int (*set_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *); }; struct quota_format_type { @@ -293,16 +302,15 @@ struct quota_info { struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -/* Inline would be better but we need to dereference super_block which is not defined yet */ -int mark_dquot_dirty(struct dquot *dquot); #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) -#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \ - (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)) +#define dqh_has_quota_enabled(hash, type) (dqhash_valid(hash) && ((type)==USRQUOTA ? \ + (dqh_dqopt(hash)->flags & DQUOT_USR_ENABLED) : (dqh_dqopt(hash)->flags & DQUOT_GRP_ENABLED))) + +#define dqh_any_quota_enabled(hash) (dqhash_valid(hash) && \ + (dqh_has_quota_enabled(hash, USRQUOTA) || dqh_has_quota_enabled(hash, GRPQUOTA))) -#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \ - sb_has_quota_enabled(sb, GRPQUOTA)) int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); @@ -317,6 +325,50 @@ struct quota_module_name { {QFMT_VFS_V0, "quota_v2"},\ {0, NULL}} +struct dqhash { + struct list_head dqh_list; /* List of all quota hashes */ + unsigned int dqh_id; /* ID for hash */ + atomic_t dqh_count; /* Use count */ + struct quota_info dqh_dqopt; /* Diskquota specific options */ + struct dquot_operations *dqh_qop; + struct quotactl_ops *dqh_qcop; + struct super_block *dqh_sb; /* super block */ + unsigned int dqh_hash_bits; + unsigned int dqh_hash_mask; + struct hlist_head *dqh_hash; +}; + +#ifdef CONFIG_QUOTACTL + +struct dqhash *new_dqhash(struct super_block *, unsigned int); +void destroy_dqhash(struct dqhash *); +struct dqhash *find_dqhash(unsigned int); + +static inline void dqhput(struct dqhash *hash) +{ + if (dqhash_valid(hash)) + if (atomic_dec_and_test(&hash->dqh_count)) + destroy_dqhash(hash); +} + +static inline struct dqhash *dqhget(struct dqhash *hash) +{ + if (dqhash_valid(hash)) + atomic_inc(&hash->dqh_count); + return hash; +} + +#else /* CONFIG_QUOTACTL */ + +#define new_dqhash(sb, dqdom) (0) +#define find_dqhash(dqdom) (0) +#define destroy_dqhash(hash) do { } while(0) + +#define dqhput(hash) do { } while(0) +#define dqhget(hash) (hash) + +#endif /* CONFIG_QUOTACTL */ + #else # /* nodep */ include diff -NurpP --minimal linux-2.6.19/include/linux/quotaops.h linux-2.6.19-vs2.1.x-t1/include/linux/quotaops.h --- linux-2.6.19/include/linux/quotaops.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/quotaops.h 2006-11-08 04:57:51 +0100 @@ -19,7 +19,7 @@ /* * declaration of quota_function calls in kernel. */ -extern void sync_dquots(struct super_block *sb, int type); +extern void sync_dquots(struct dqhash *hash, int type); extern int dquot_initialize(struct inode *inode, int type); extern int dquot_drop(struct inode *inode); @@ -34,19 +34,19 @@ extern int dquot_transfer(struct inode * extern int dquot_commit(struct dquot *dquot); extern int dquot_acquire(struct dquot *dquot); extern int dquot_release(struct dquot *dquot); -extern int dquot_commit_info(struct super_block *sb, int type); +extern int dquot_commit_info(struct dqhash *hash, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); -extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, +extern int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path); +extern int vfs_quota_on_mount(struct dqhash *hash, char *qf_name, int format_id, int type); -extern int vfs_quota_off(struct super_block *sb, int type); -#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) -extern int vfs_quota_sync(struct super_block *sb, int type); -extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); -extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +extern int vfs_quota_off(struct dqhash *hash, int type); +#define vfs_quota_off_mount(dqh, type) vfs_quota_off(dqh, type) +extern int vfs_quota_sync(struct dqhash *hash, int type); +extern int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii); +extern int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii); +extern int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di); +extern int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di); /* * Operations supported for diskquotas. @@ -61,9 +61,12 @@ extern struct quotactl_ops vfs_quotactl_ * need a lot of space in journal for dquot structure allocation. */ static __inline__ void DQUOT_INIT(struct inode *inode) { - BUG_ON(!inode->i_sb); - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) - inode->i_sb->dq_op->initialize(inode, -1); + if (!dqhash_valid(inode->i_dqh)) + return; + BUG_ON(!inode->i_dqh); + // printk("DQUOT_INIT(%p,%p,%d)\n", inode, inode->i_dqh, dqh_any_quota_enabled(inode->i_dqh)); + if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) + inode->i_dqh->dqh_qop->initialize(inode, -1); } /* The same as with DQUOT_INIT */ @@ -72,8 +75,8 @@ static __inline__ void DQUOT_DROP(struct /* Here we can get arbitrary inode from clear_inode() so we have * to be careful. OTOH we don't need locking as quota operations * are allowed to change only at mount time */ - if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op - && inode->i_sb->dq_op->drop) { + if (!IS_NOQUOTA(inode) && inode->i_dqh && inode->i_dqh->dqh_qop + && inode->i_dqh->dqh_qop->drop) { int cnt; /* Test before calling to rule out calls from proc and such * where we are not allowed to block. Note that this is @@ -84,7 +87,7 @@ static __inline__ void DQUOT_DROP(struct if (inode->i_dquot[cnt] != NODQUOT) break; if (cnt < MAXQUOTAS) - inode->i_sb->dq_op->drop(inode); + inode->i_dqh->dqh_qop->drop(inode); } } @@ -92,9 +95,9 @@ static __inline__ void DQUOT_DROP(struct * a transaction (deadlocks possible otherwise) */ static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 1) == NO_QUOTA) return 1; } else @@ -112,9 +115,9 @@ static __inline__ int DQUOT_PREALLOC_SPA static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 0) == NO_QUOTA) return 1; } else @@ -132,9 +135,9 @@ static __inline__ int DQUOT_ALLOC_SPACE( static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (dqh_any_quota_enabled(inode->i_dqh)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->alloc_inode(inode, 1) == NO_QUOTA) return 1; } return 0; @@ -142,8 +145,8 @@ static __inline__ int DQUOT_ALLOC_INODE( static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) - inode->i_sb->dq_op->free_space(inode, nr); + if (dqh_any_quota_enabled(inode->i_dqh)) + inode->i_dqh->dqh_qop->free_space(inode, nr); else inode_sub_bytes(inode, nr); } @@ -156,29 +159,30 @@ static __inline__ void DQUOT_FREE_SPACE( static __inline__ void DQUOT_FREE_INODE(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) - inode->i_sb->dq_op->free_inode(inode, 1); + if (dqh_any_quota_enabled(inode->i_dqh)) + inode->i_dqh->dqh_qop->free_inode(inode, 1); } static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) { - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { + if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) + if (inode->i_dqh->dqh_qop->transfer(inode, iattr) == NO_QUOTA) return 1; } return 0; } /* The following two functions cannot be called inside a transaction */ -#define DQUOT_SYNC(sb) sync_dquots(sb, -1) +#define DQUOT_SYNC(hash) sync_dquots(hash, -1) -static __inline__ int DQUOT_OFF(struct super_block *sb) +static __inline__ int DQUOT_OFF(struct dqhash *hash) { int ret = -ENOSYS; - if (sb_any_quota_enabled(sb) && sb->s_qcop && sb->s_qcop->quota_off) - ret = sb->s_qcop->quota_off(sb, -1); + if (dqh_any_quota_enabled(hash) && hash->dqh_qcop && + hash->dqh_qcop->quota_off) + ret = hash->dqh_qcop->quota_off(hash, -1); return ret; } @@ -193,8 +197,8 @@ static __inline__ int DQUOT_OFF(struct s #define DQUOT_DROP(inode) do { } while(0) #define DQUOT_ALLOC_INODE(inode) (0) #define DQUOT_FREE_INODE(inode) do { } while(0) -#define DQUOT_SYNC(sb) do { } while(0) -#define DQUOT_OFF(sb) do { } while(0) +#define DQUOT_SYNC(hash) do { } while(0) +#define DQUOT_OFF(hash) do { } while(0) #define DQUOT_TRANSFER(inode, iattr) (0) static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { diff -NurpP --minimal linux-2.6.19/include/linux/reiserfs_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/reiserfs_fs.h --- linux-2.6.19/include/linux/reiserfs_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/reiserfs_fs.h 2006-11-08 04:57:46 +0100 @@ -821,6 +821,10 @@ struct stat_data_v1 { #define REISERFS_COMPR_FL FS_COMPR_FL #define REISERFS_NOTAIL_FL FS_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (FS_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (FS_IUNLINK_FL >> 16) + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ @@ -830,6 +834,14 @@ struct stat_data_v1 { REISERFS_COMPR_FL | \ REISERFS_NOTAIL_FL ) +#ifdef CONFIG_VSERVER_LEGACY +#define REISERFS_FL_USER_VISIBLE (REISERFS_IUNLINK_FL|0x80FF) +#define REISERFS_FL_USER_MODIFIABLE (REISERFS_IUNLINK_FL|0x80FF) +#else +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFIABLE 0x80FF +#endif + /* Stat Data on disk (reiserfs version of UFS disk inode minus the address blocks) */ struct stat_data { @@ -1901,6 +1913,7 @@ static inline void reiserfs_update_sd(st void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); +int reiserfs_sync_flags(struct inode *inode); /* namei.c */ void set_de_name_and_namelen(struct reiserfs_dir_entry *de); diff -NurpP --minimal linux-2.6.19/include/linux/reiserfs_fs_sb.h linux-2.6.19-vs2.1.x-t1/include/linux/reiserfs_fs_sb.h --- linux-2.6.19/include/linux/reiserfs_fs_sb.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/reiserfs_fs_sb.h 2006-11-08 04:57:46 +0100 @@ -456,6 +456,7 @@ enum reiserfs_mount_options { REISERFS_POSIXACL, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, + REISERFS_TAGGED, /* Actions on error */ REISERFS_ERROR_PANIC, diff -NurpP --minimal linux-2.6.19/include/linux/sched.h linux-2.6.19-vs2.1.x-t1/include/linux/sched.h --- linux-2.6.19/include/linux/sched.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/sched.h 2006-11-30 18:53:18 +0100 @@ -26,6 +26,7 @@ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ /* * Scheduling policies @@ -54,6 +55,7 @@ struct sched_param { #include #include #include +// #include #include #include @@ -92,7 +94,7 @@ struct futex_pi_state; * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. */ -#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) +#define CLONE_KERNEL (CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_KTHREAD) /* * These are the constant used to fake the fixed-point load-average @@ -150,6 +152,7 @@ extern unsigned long weighted_cpuload(co /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 #define TASK_DEAD 128 +#define TASK_ONHOLD 256 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -264,27 +267,30 @@ extern void arch_unmap_area_topdown(stru * The mm counters are not protected by its page_table_lock, * so must be incremented atomically. */ -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member)) -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member) -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) typedef atomic_long_t mm_counter_t; +#define __set_mm_counter(mm, member, value) \ + atomic_long_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) \ + ((unsigned long)atomic_long_read(&(mm)->_##member)) #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ /* * The mm counters are protected by its page_table_lock, * so can be incremented directly. */ -#define set_mm_counter(mm, member, value) (mm)->_##member = (value) -#define get_mm_counter(mm, member) ((mm)->_##member) -#define add_mm_counter(mm, member, value) (mm)->_##member += (value) -#define inc_mm_counter(mm, member) (mm)->_##member++ -#define dec_mm_counter(mm, member) (mm)->_##member-- typedef unsigned long mm_counter_t; +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value) +#define get_mm_counter(mm, member) ((mm)->_##member) #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define set_mm_counter(mm, member, value) \ + vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value)) +#define add_mm_counter(mm, member, value) \ + vx_ ## member ## pages_add((mm), (value)) +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm)) +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm)) + #define get_mm_rss(mm) \ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) #define update_hiwater_rss(mm) do { \ @@ -343,6 +349,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* Token based thrashing protection. */ unsigned long swap_token_time; @@ -532,9 +539,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -925,6 +933,14 @@ struct task_struct { void *security; struct audit_context *audit_context; + +/* vserver context data */ + struct vx_info *vx_info; + struct nx_info *nx_info; + + xid_t xid; + nid_t nid; + seccomp_t seccomp; /* Thread group tracking */ @@ -1221,13 +1237,18 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) + +#define find_task_by_real_pid(nr) \ + find_task_by_pid_type(PIDTYPE_REALPID, nr) +#define find_task_by_pid(nr) \ + find_task_by_pid_type(PIDTYPE_PID, nr) + extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff -NurpP --minimal linux-2.6.19/include/linux/security.h linux-2.6.19-vs2.1.x-t1/include/linux/security.h --- linux-2.6.19/include/linux/security.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/security.h 2006-11-08 04:57:51 +0100 @@ -1172,7 +1172,7 @@ struct security_operations { int (*capable) (struct task_struct * tsk, int cap); int (*acct) (struct file * file); int (*sysctl) (struct ctl_table * table, int op); - int (*quotactl) (int cmds, int type, int id, struct super_block * sb); + int (*quotactl) (int cmds, int type, int id, struct dqhash *); int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); @@ -1456,9 +1456,9 @@ static inline int security_sysctl(struct } static inline int security_quotactl (int cmds, int type, int id, - struct super_block *sb) + struct dqhash *hash) { - return security_ops->quotactl (cmds, type, id, sb); + return security_ops->quotactl (cmds, type, id, hash); } static inline int security_quota_on (struct dentry * dentry) @@ -2201,7 +2201,7 @@ static inline int security_sysctl(struct } static inline int security_quotactl (int cmds, int type, int id, - struct super_block * sb) + struct dqhash * hash) { return 0; } diff -NurpP --minimal linux-2.6.19/include/linux/shmem_fs.h linux-2.6.19-vs2.1.x-t1/include/linux/shmem_fs.h --- linux-2.6.19/include/linux/shmem_fs.h 2006-11-30 21:19:39 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/shmem_fs.h 2006-11-08 04:57:53 +0100 @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long flags; diff -NurpP --minimal linux-2.6.19/include/linux/stat.h linux-2.6.19-vs2.1.x-t1/include/linux/stat.h --- linux-2.6.19/include/linux/stat.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/stat.h 2006-11-08 04:57:46 +0100 @@ -63,6 +63,7 @@ struct kstat { unsigned int nlink; uid_t uid; gid_t gid; + tag_t tag; dev_t rdev; loff_t size; struct timespec atime; diff -NurpP --minimal linux-2.6.19/include/linux/sunrpc/auth.h linux-2.6.19-vs2.1.x-t1/include/linux/sunrpc/auth.h --- linux-2.6.19/include/linux/sunrpc/auth.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/sunrpc/auth.h 2006-11-08 04:57:47 +0100 @@ -24,6 +24,7 @@ struct auth_cred { uid_t uid; gid_t gid; + tag_t tag; struct group_info *group_info; }; diff -NurpP --minimal linux-2.6.19/include/linux/sunrpc/clnt.h linux-2.6.19-vs2.1.x-t1/include/linux/sunrpc/clnt.h --- linux-2.6.19/include/linux/sunrpc/clnt.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/sunrpc/clnt.h 2006-11-08 04:57:47 +0100 @@ -42,7 +42,8 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tag : 1;/* context tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ diff -NurpP --minimal linux-2.6.19/include/linux/syscalls.h linux-2.6.19-vs2.1.x-t1/include/linux/syscalls.h --- linux-2.6.19/include/linux/syscalls.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/syscalls.h 2006-11-08 04:57:51 +0100 @@ -294,6 +294,8 @@ asmlinkage long sys_symlink(const char _ asmlinkage long sys_unlink(const char __user *pathname); asmlinkage long sys_rename(const char __user *oldname, const char __user *newname); +asmlinkage long sys_copyfile(const char __user *from, const char __user *to, + umode_t mode); asmlinkage long sys_chmod(const char __user *filename, mode_t mode); asmlinkage long sys_fchmod(unsigned int fd, mode_t mode); diff -NurpP --minimal linux-2.6.19/include/linux/sysctl.h linux-2.6.19-vs2.1.x-t1/include/linux/sysctl.h --- linux-2.6.19/include/linux/sysctl.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/sysctl.h 2006-11-08 21:52:09 +0100 @@ -101,6 +101,7 @@ enum KERN_CAP_BSET=14, /* int: capability bounding set */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ + KERN_VSHELPER=17, /* string: path to vshelper policy agent */ KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ @@ -932,6 +933,9 @@ typedef int ctl_handler (ctl_table *tabl typedef int proc_handler (ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid, + void **datap, size_t *lenp); + extern int proc_dostring(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(ctl_table *, int, struct file *, @@ -1016,6 +1020,7 @@ struct ctl_table mode_t mode; ctl_table *child; proc_handler *proc_handler; /* Callback for text formatting */ + virt_handler *virt_handler; /* Context virtualization */ ctl_handler *strategy; /* Callback function for all r/w */ struct proc_dir_entry *de; /* /proc control block */ void *extra1; diff -NurpP --minimal linux-2.6.19/include/linux/sysfs.h linux-2.6.19-vs2.1.x-t1/include/linux/sysfs.h --- linux-2.6.19/include/linux/sysfs.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/sysfs.h 2006-11-08 04:57:53 +0100 @@ -13,6 +13,8 @@ #include #include +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; diff -NurpP --minimal linux-2.6.19/include/linux/time.h linux-2.6.19-vs2.1.x-t1/include/linux/time.h --- linux-2.6.19/include/linux/time.h 2006-09-20 16:58:44 +0200 +++ linux-2.6.19-vs2.1.x-t1/include/linux/time.h 2006-11-08 04:57:40 +0100 @@ -174,6 +174,9 @@ static inline void timespec_add_ns(struc } a->tv_nsec = ns; } + +#include + #endif /* __KERNEL__ */ #define NFDBITS __NFDBITS diff -NurpP --minimal linux-2.6.19/include/linux/types.h linux-2.6.19-vs2.1.x-t1/include/linux/types.h --- linux-2.6.19/include/linux/types.h 2006-11-30 21:19:40 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/types.h 2006-11-08 04:57:40 +0100 @@ -39,6 +39,9 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; +typedef unsigned int tag_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff -NurpP --minimal linux-2.6.19/include/linux/vroot.h linux-2.6.19-vs2.1.x-t1/include/linux/vroot.h --- linux-2.6.19/include/linux/vroot.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vroot.h 2006-11-08 04:57:51 +0100 @@ -0,0 +1,51 @@ + +/* + * include/linux/vroot.h + * + * written by Herbert Pötzl, 9/11/2002 + * ported to 2.6 by Herbert Pötzl, 30/12/2004 + * + * Copyright (C) 2002-2005 by Herbert Pötzl. + * Redistribution of this file is permitted under the + * GNU General Public License. + */ + +#ifndef _LINUX_VROOT_H +#define _LINUX_VROOT_H + + +#ifdef __KERNEL__ + +/* Possible states of device */ +enum { + Vr_unbound, + Vr_bound, +}; + +struct vroot_device { + int vr_number; + int vr_refcnt; + + struct semaphore vr_ctl_mutex; + struct block_device *vr_device; + int vr_state; +}; + + +typedef struct block_device *(vroot_grb_func)(struct block_device *); + +extern int register_vroot_grb(vroot_grb_func *); +extern int unregister_vroot_grb(vroot_grb_func *); + +#endif /* __KERNEL__ */ + +#define MAX_VROOT_DEFAULT 8 + +/* + * IOCTL commands --- we will commandeer 0x56 ('V') + */ + +#define VROOT_SET_DEV 0x5600 +#define VROOT_CLR_DEV 0x5601 + +#endif /* _LINUX_VROOT_H */ diff -NurpP --minimal linux-2.6.19/include/linux/vs_base.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_base.h --- linux-2.6.19/include/linux/vs_base.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_base.h 2006-11-30 19:39:09 +0100 @@ -0,0 +1,9 @@ +#ifndef _VS_BASE_H +#define _VS_BASE_H + +#include "vserver/base.h" +#include "vserver/debug.h" + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_context.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_context.h --- linux-2.6.19/include/linux/vs_context.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_context.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,244 @@ +#ifndef _VS_CONTEXT_H +#define _VS_CONTEXT_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/history.h" +#include "vserver/debug.h" + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return NULL; + + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_get_vx_info(vxi, _here); + + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +extern void free_vx_info(struct vx_info *); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__) + +static inline void __put_vx_info(struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_put_vx_info(vxi, _here); + + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + + +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + if (vxi) { + vxlprintk(VXD_CBIT(xid, 3), + "init_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_init_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + } + *vxp = vxi; +} + + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + if (!vxi) + return; + + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + __vxh_set_vx_info(vxi, vxp, _here); + + atomic_inc(&vxi->vx_usecnt); + vxo = xchg(vxp, vxi); + BUG_ON(vxo); +} + + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxo; + + vxo = xchg(vxp, NULL); + if (!vxo) + return; + + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + _file, _line); + __vxh_clr_vx_info(vxo, vxp, _here); + + if (atomic_dec_and_test(&vxo->vx_usecnt)) + free_vx_info(vxo); +} + + +#define claim_vx_info(v,p) \ + __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __claim_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_claim_vx_info(vxi, task, _here); + + atomic_inc(&vxi->vx_tasks); +} + + +extern void unhash_vx_info(struct vx_info *); + +#define release_vx_info(v,p) \ + __release_vx_info(v,p,__FILE__,__LINE__,__HERE__) + +static inline void __release_vx_info(struct vx_info *vxi, + struct task_struct *task, + const char *_file, int _line, void *_here) +{ + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_tasks):0, + task, _file, _line); + __vxh_release_vx_info(vxi, task, _here); + + might_sleep(); + + if (atomic_dec_and_test(&vxi->vx_tasks)) + unhash_vx_info(vxi); +} + + +#define task_get_vx_info(p) \ + __task_get_vx_info(p,__FILE__,__LINE__,__HERE__) + +static inline struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line, void *_here) +{ + struct vx_info *vxi; + + task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); + vxi = __get_vx_info(p->vx_info, _file, _line, _here); + task_unlock(p); + return vxi; +} + + +static inline void __wakeup_vx_info(struct vx_info *vxi) +{ + if (waitqueue_active(&vxi->vx_wait)) + wake_up_interruptible(&vxi->vx_wait); +} + + +#define enter_vx_info(v,s) __enter_vx_info(v,s,__FILE__,__LINE__) + +static inline void __enter_vx_info(struct vx_info *vxi, + struct vx_info_save *vxis, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]", + vxi, vxi ? vxi->vx_id : 0, vxis, current, + current->xid, current->vx_info, _file, _line); + vxis->vxi = xchg(¤t->vx_info, vxi); + vxis->xid = current->xid; + current->xid = vxi ? vxi->vx_id : 0; +} + +#define leave_vx_info(s) __leave_vx_info(s,__FILE__,__LINE__) + +static inline void __leave_vx_info(struct vx_info_save *vxis, + const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]", + vxis, vxis->xid, vxis->vxi, current, + current->xid, current->vx_info, _file, _line); + (void)xchg(¤t->vx_info, vxis->vxi); + current->xid = vxis->xid; +} + + +static inline void __enter_vx_admin(struct vx_info_save *vxis) +{ + vxis->vxi = xchg(¤t->vx_info, NULL); + vxis->xid = xchg(¤t->xid, (xid_t)0); +} + +static inline void __leave_vx_admin(struct vx_info_save *vxis) +{ + (void)xchg(¤t->xid, vxis->xid); + (void)xchg(¤t->vx_info, vxis->vxi); +} + +extern void exit_vx_info(struct task_struct *, int); +extern void exit_vx_info_early(struct task_struct *, int); + + +static inline +struct task_struct *vx_child_reaper(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + struct task_struct *reaper = child_reaper; + + if (!vxi) + goto out; + + BUG_ON(!p->vx_info->vx_reaper); + + /* child reaper for the guest reaper */ + if (vxi->vx_reaper == p) + goto out; + + reaper = vxi->vx_reaper; +out: + vxdprintk(VXD_CBIT(xid, 3), + "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]\n", + p, p->xid, p->pid, reaper, reaper->xid, reaper->pid); + return reaper; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_cowbl.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_cowbl.h --- linux-2.6.19/include/linux/vs_cowbl.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_cowbl.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,44 @@ +#ifndef _VS_COWBL_H +#define _VS_COWBL_H + +#include +#include +#include + +extern struct dentry *cow_break_link(const char *pathname); + +static inline int cow_check_and_break(struct nameidata *nd) +{ + struct inode *inode = nd->dentry->d_inode; + int error = 0; + if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) + return -EROFS; + if (IS_COW(inode)) { + if (IS_COW_LINK(inode)) { + struct dentry *new_dentry, *old_dentry = nd->dentry; + char *path, *buf; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + return -ENOMEM; + } + path = d_path(nd->dentry, nd->mnt, buf, PATH_MAX); + new_dentry = cow_break_link(path); + kfree(buf); + if (!IS_ERR(new_dentry)) { + nd->dentry = new_dentry; + dput(old_dentry); + } else + error = PTR_ERR(new_dentry); + } else { + inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + } + return error; +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_cvirt.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_cvirt.h --- linux-2.6.19/include/linux/vs_cvirt.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_cvirt.h 2006-11-30 19:12:40 +0100 @@ -0,0 +1,49 @@ +#ifndef _VS_CVIRT_H +#define _VS_CVIRT_H + +#include "vserver/cvirt.h" +#include "vserver/context.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +static inline void vx_activate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_inc(&vxi->cvirt.nr_running); + } +} + +static inline void vx_deactivate_task(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) { + vx_update_load(vxi); + atomic_dec(&vxi->cvirt.nr_running); + } +} + +static inline void vx_uninterruptible_inc(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_inc(&vxi->cvirt.nr_uninterruptible); +} + +static inline void vx_uninterruptible_dec(struct task_struct *p) +{ + struct vx_info *vxi; + + if ((vxi = p->vx_info)) + atomic_dec(&vxi->cvirt.nr_uninterruptible); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_dlimit.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_dlimit.h --- linux-2.6.19/include/linux/vs_dlimit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_dlimit.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,214 @@ +#ifndef _VS_DLIMIT_H +#define _VS_DLIMIT_H + +#include "vserver/dlimit.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return; + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", + dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + tag_t tag, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, tag, __dlimit_char(dli), (long long)nr, + ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + tag_t tag, dlsize_t nr, const char *_file, int _line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, tag, __dlimit_char(dli), (long long)nr, + _file, _line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + tag_t tag, const char *_file, int _line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; +#if 0 + else + vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d", + sb, tag, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, tag, __dlimit_char(dli), ret, _file, _line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + tag_t tag, const char *_file, int _line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, tag); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, tag, __dlimit_char(dli), _file, _line); +} + +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag, + unsigned long *free_blocks, unsigned long *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, tag); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %ld,%ld [mult=%d]", + (long long)bfree, (long long)broot, + *free_blocks, *root_blocks, dli->dl_nrlmult, + _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} + +#define DLIMIT_ALLOC_SPACE(in, bytes) \ + __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_SPACE(in, bytes) \ + __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_BLOCK(in, nr) \ + __dl_alloc_space((in)->i_sb, (in)->i_tag, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(in, nr) \ + __dl_free_space((in)->i_sb, (in)->i_tag, \ + ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \ + __FILE__, __LINE__ ) + + +#define DLIMIT_ALLOC_INODE(in) \ + __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(in) \ + __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, tag, fb, rb) \ + __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ ) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_limit.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_limit.h --- linux-2.6.19/include/linux/vs_limit.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_limit.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,140 @@ +#ifndef _VS_LIMIT_H +#define _VS_LIMIT_H + +#include "vserver/limit.h" +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" +#include "vserver/context.h" +#include "vserver/limit_int.h" + + +#define vx_acc_cres(v,d,p,r) \ + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__) + +#define vx_acc_cres_cond(x,d,p,r) \ + __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, d, p, __FILE__, __LINE__) + + +#define vx_add_cres(v,a,p,r) \ + __vx_add_cres(v, r, a, p, __FILE__, __LINE__) +#define vx_sub_cres(v,a,p,r) vx_add_cres(v,-(a),p,r) + +#define vx_add_cres_cond(x,a,p,r) \ + __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \ + r, a, p, __FILE__, __LINE__) +#define vx_sub_cres_cond(x,a,p,r) vx_add_cres_cond(x,-(a),p,r) + + +/* process and file limits */ + +#define vx_nproc_inc(p) \ + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC) + +#define vx_nproc_dec(p) \ + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC) + +#define vx_files_inc(f) \ + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE) + +#define vx_files_dec(f) \ + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE) + +#define vx_locks_inc(l) \ + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS) + +#define vx_locks_dec(l) \ + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS) + +#define vx_openfd_inc(f) \ + vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD) + +#define vx_openfd_dec(f) \ + vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD) + + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail(v, r, n, __FILE__, __LINE__) + + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE) + +#define vx_locks_avail(n) \ + vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD) + + +/* dentry limits */ + +#define vx_dentry_inc(d) do { \ + if (atomic_read(&d->d_count) == 1) \ + vx_acc_cres(current->vx_info, 1, d, VLIMIT_DENTRY); \ + } while (0) + +#define vx_dentry_dec(d) do { \ + if (atomic_read(&d->d_count) == 0) \ + vx_acc_cres(current->vx_info,-1, d, VLIMIT_DENTRY); \ + } while (0) + +#define vx_dentry_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_DENTRY) + + +/* socket limits */ + +#define vx_sock_inc(s) \ + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK) + +#define vx_sock_dec(s) \ + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK) + + +/* ipc resource limits */ + +#define vx_ipcmsg_add(v,u,a) \ + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_sub(v,u,a) \ + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE) + +#define vx_ipcmsg_avail(v,a) \ + vx_cres_avail(v, a, RLIMIT_MSGQUEUE) + + +#define vx_ipcshm_add(v,k,a) \ + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_sub(v,k,a) \ + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM) + +#define vx_ipcshm_avail(v,a) \ + vx_cres_avail(v, a, VLIMIT_SHMEM) + + +#define vx_semary_inc(a) \ + vx_acc_cres(current->vx_info, 1, a, VLIMIT_SEMARY) + +#define vx_semary_dec(a) \ + vx_acc_cres(current->vx_info,-1, a, VLIMIT_SEMARY) + + +#define vx_nsems_add(a,n) \ + vx_add_cres(current->vx_info, n, a, VLIMIT_NSEMS) + +#define vx_nsems_sub(a,n) \ + vx_sub_cres(current->vx_info, n, a, VLIMIT_NSEMS) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_memory.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_memory.h --- linux-2.6.19/include/linux/vs_memory.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_memory.h 2006-11-30 19:31:41 +0100 @@ -0,0 +1,159 @@ +#ifndef _VS_MEMORY_H +#define _VS_MEMORY_H + +#include "vserver/limit.h" +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" +#include "vserver/context.h" +#include "vserver/limit_int.h" + + +#define __acc_add_long(a,v) (*(v) += (a)) +#define __acc_inc_long(v) (++*(v)) +#define __acc_dec_long(v) (--*(v)) + +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +#define __acc_add_atomic(a,v) atomic_long_add(a,v) +#define __acc_inc_atomic(v) atomic_long_inc(v) +#define __acc_dec_atomic(v) atomic_long_dec(v) +#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#define __acc_add_atomic(a,v) __acc_add_long(a,v) +#define __acc_inc_atomic(v) __acc_inc_long(v) +#define __acc_dec_atomic(v) __acc_dec_long(v) +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + + +#define vx_acc_page(m,d,v,r) do { \ + if ((d) > 0) \ + __acc_inc_long(&(m->v)); \ + else \ + __acc_dec_long(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_page_atomic(m,d,v,r) do { \ + if ((d) > 0) \ + __acc_inc_atomic(&(m->v)); \ + else \ + __acc_dec_atomic(&(m->v)); \ + __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__); \ +} while (0) + + +#define vx_acc_pages(m,p,v,r) do { \ + unsigned long __p = (p); \ + __acc_add_long(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + +#define vx_acc_pages_atomic(m,p,v,r) do { \ + unsigned long __p = (p); \ + __acc_add_atomic(__p, &(m->v)); \ + __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__); \ +} while (0) + + + +#define vx_acc_vmpage(m,d) \ + vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) \ + vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _file_rss, VLIMIT_MAPPED) +#define vx_acc_anon_rsspage(m,d) \ + vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON) + +#define vx_acc_vmpages(m,p) \ + vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) \ + vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_file_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _file_rss, VLIMIT_MAPPED) +#define vx_acc_anon_rsspages(m,p) \ + vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON) + +#define vx_pages_add(s,r,p) __vx_add_cres(s, r, p, 0, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_file_rsspages_inc(m) vx_acc_file_rsspage(m, 1) +#define vx_file_rsspages_dec(m) vx_acc_file_rsspage(m,-1) +#define vx_file_rsspages_add(m,p) vx_acc_file_rsspages(m, p) +#define vx_file_rsspages_sub(m,p) vx_acc_file_rsspages(m,-(p)) + +#define vx_anon_rsspages_inc(m) vx_acc_anon_rsspage(m, 1) +#define vx_anon_rsspages_dec(m) vx_acc_anon_rsspage(m,-1) +#define vx_anon_rsspages_add(m,p) vx_acc_anon_rsspages(m, p) +#define vx_anon_rsspages_sub(m,p) vx_acc_anon_rsspages(m,-(p)) + + +#define vx_pages_avail(m,p,r) \ + __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__) + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_anon_avail(m,p) vx_pages_avail(m, p, VLIMIT_ANON) +#define vx_mapped_avail(m,p) vx_pages_avail(m, p, VLIMIT_MAPPED) + +#define vx_rss_avail(m,p) \ + __vx_cres_array_avail((m)->mm_vx_info, VLA_RSS, p, __FILE__, __LINE__) + + +enum { + VXPT_UNKNOWN = 0, + VXPT_ANON, + VXPT_NONE, + VXPT_FILE, + VXPT_SWAP, + VXPT_WRITE +}; + +#if 0 +#define vx_page_fault(mm,vma,type,ret) +#else + +static inline +void __vx_page_fault(struct mm_struct *mm, + struct vm_area_struct *vma, int type, int ret) +{ + struct vx_info *vxi = mm->mm_vx_info; + int what; +/* + static char *page_type[6] = + { "UNKNOWN", "ANON","NONE", "FILE", "SWAP", "WRITE" }; + static char *page_what[4] = + { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" }; +*/ + + if (!vxi) + return; + + what = (ret & 0x3); + +/* printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id, + type, what, ret, page_type[type], page_what[what]); +*/ + if (ret & VM_FAULT_WRITE) + what |= 0x4; + atomic_inc(&vxi->cacct.page[type][what]); +} + +#define vx_page_fault(mm,vma,type,ret) __vx_page_fault(mm,vma,type,ret) +#endif + + +extern unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm); + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_network.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_network.h --- linux-2.6.19/include/linux/vs_network.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_network.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,183 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + +#include "vserver/context.h" +#include "vserver/network.h" +#include "vserver/base.h" +#include "vserver/debug.h" + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +extern void free_nx_info(struct nx_info *); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + + +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__) + +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + if (nxi) { + vxlprintk(VXD_CBIT(nid, 3), + "init_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + } + *nxp = nxi; +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + struct nx_info *nxo; + + if (!nxi) + return; + + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + + atomic_inc(&nxi->nx_usecnt); + nxo = xchg(nxp, nxi); + BUG_ON(nxo); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo; + + nxo = xchg(nxp, NULL); + if (!nxo) + return; + + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + _file, _line); + + if (atomic_dec_and_test(&nxo->nx_usecnt)) + free_nx_info(nxo); +} + + +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__) + +static inline void __claim_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + atomic_inc(&nxi->nx_tasks); +} + + +extern void unhash_nx_info(struct nx_info *); + +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__) + +static inline void __release_nx_info(struct nx_info *nxi, + struct task_struct *task, const char *_file, int _line) +{ + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_tasks):0, + task, _file, _line); + + might_sleep(); + + if (atomic_dec_and_test(&nxi->nx_tasks)) + unhash_nx_info(nxi); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + + + + +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr) +{ + int n,i; + + if (!nxi) + return 1; + + n = nxi->nbipv4; + if (n && (nxi->ipv4[0] == 0)) + return 1; + for (i=0; iipv4[i] == addr) + return 1; + } + return 0; +} + +static inline void exit_nx_info(struct task_struct *p) +{ + if (p->nx_info) + release_nx_info(p->nx_info, p); +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_pid.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_pid.h --- linux-2.6.19/include/linux/vs_pid.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_pid.h 2006-11-30 19:13:14 +0100 @@ -0,0 +1,108 @@ +#ifndef _VS_PID_H +#define _VS_PID_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/debug.h" + + +/* pid faking stuff */ + + +#define vx_info_map_pid(v,p) \ + __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p) +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p) +#define vx_map_tgid(p) vx_map_pid(p) + +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_map_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid && pid == vxi->vx_initpid)?1:pid, + func, file, line); + if (pid == 0) + return 0; + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_info_rmap_pid(v,p) \ + __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__) +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p) +#define vx_rmap_tgid(p) vx_rmap_pid(p) + +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid, + const char *func, const char *file, int line) +{ + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) { + vxfprintk(VXD_CBIT(cvirt, 2), + "vx_rmap_tgid: %p/%llx: %d -> %d", + vxi, (long long)vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + func, file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + if (pid == vxi->vx_initpid) + return ~0U; + } + return pid; +} + + +#define VXF_FAKE_INIT (VXF_INFO_INIT|VXF_STATE_INIT) + +static inline +int proc_pid_visible(struct task_struct *task, int pid) +{ + if ((pid == 1) && + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT)) + /* show a blend through init */ + goto visible; + if (vx_check(vx_task_xid(task), VS_WATCH|VS_IDENT)) + goto visible; + return 0; +visible: + return 1; +} + +static inline +struct task_struct *find_proc_task_by_pid(int pid) +{ + struct task_struct *task = find_task_by_pid(pid); + + if (task && !proc_pid_visible(task, pid)) { + vxdprintk(VXD_CBIT(misc, 6), + "dropping task %p[#%u,%u] for %p[#%u,%u]", + task, task->xid, task->pid, + current, current->xid, current->pid); + task = NULL; + } + return task; +} + +static inline +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid) +{ + struct task_struct *task = get_pid_task(pid, PIDTYPE_PID); + + if (task && !proc_pid_visible(task, pid->nr)) { + vxdprintk(VXD_CBIT(misc, 6), + "dropping task %p[#%u,%u] for %p[#%u,%u]", + task, task->xid, task->pid, + current, current->xid, current->pid); + put_task_struct(task); + task = NULL; + } + return task; +} + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_sched.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_sched.h --- linux-2.6.19/include/linux/vs_sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_sched.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,109 @@ +#ifndef _VS_SCHED_H +#define _VS_SCHED_H + +#include "vserver/base.h" +#include "vserver/context.h" +#include "vserver/sched.h" + + +#define VAVAVOOM_RATIO 50 + +#define MAX_PRIO_BIAS 20 +#define MIN_PRIO_BIAS -20 + + +#ifdef CONFIG_VSERVER_HARDCPU + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +static inline +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio) +{ + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = sched_pc->tokens) >= 0) { + max = sched_pc->tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + return vavavoom; + } + return 0; +} + + +static inline +int vx_adjust_prio(struct task_struct *p, int prio, int max_user) +{ + struct vx_info *vxi = p->vx_info; + + if (!vxi) + return prio; + + if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) { + struct _vx_sched_pc *sched_pc = &vx_cpu(vxi, sched_pc); + int vavavoom = vx_effective_vavavoom(sched_pc, max_user); + + vxi->sched.vavavoom = vavavoom; + prio += vavavoom; + } + prio += vxi->sched.prio_bias; + return prio; +} + +#else /* !CONFIG_VSERVER_HARDCPU */ + +static inline +int vx_adjust_prio(struct task_struct *p, int prio, int max_user) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) + prio += vxi->sched.prio_bias; + return prio; +} + +#endif /* CONFIG_VSERVER_HARDCPU */ + + +static inline void vx_account_user(struct vx_info *vxi, + cputime_t cputime, int nice) +{ + if (!vxi) + return; + vx_cpu(vxi, sched_pc).user_ticks += cputime; +} + +static inline void vx_account_system(struct vx_info *vxi, + cputime_t cputime, int idle) +{ + if (!vxi) + return; + vx_cpu(vxi, sched_pc).sys_ticks += cputime; +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_socket.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_socket.h --- linux-2.6.19/include/linux/vs_socket.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_socket.h 2006-11-30 19:13:42 +0100 @@ -0,0 +1,67 @@ +#ifndef _VS_SOCKET_H +#define _VS_SOCKET_H + +#include "vserver/debug.h" +#include "vserver/base.h" +#include "vserver/cacct.h" +#include "vserver/context.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + switch (family) { + case PF_UNSPEC: + return VXA_SOCK_UNSPEC; + case PF_UNIX: + return VXA_SOCK_UNIX; + case PF_INET: + return VXA_SOCK_INET; + case PF_INET6: + return VXA_SOCK_INET6; + case PF_PACKET: + return VXA_SOCK_PACKET; + default: + return VXA_SOCK_OTHER; + } +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_tag.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_tag.h --- linux-2.6.19/include/linux/vs_tag.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_tag.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,44 @@ +#ifndef _VS_TAG_H +#define _VS_TAG_H + +#include + +/* check conditions */ + +#define DX_ADMIN 0x0001 +#define DX_WATCH 0x0002 +#define DX_HOSTID 0x0008 + +#define DX_IDENT 0x0010 + +#define DX_ARG_MASK 0x0010 + + +#define dx_task_tag(t) ((t)->xid) + +#define dx_current_tag() dx_task_tag(current) + +#define dx_check(c,m) __dx_check(dx_current_tag(),c,m) + +#define dx_weak_check(c,m) ((m) ? dx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally against supplied argument + */ +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode) +{ + if (mode & DX_ARG_MASK) { + if ((mode & DX_IDENT) && + (id == cid)) + return 1; + } + return (((mode & DX_ADMIN) && (cid == 0)) || + ((mode & DX_WATCH) && (cid == 1)) || + ((mode & DX_HOSTID) && (id == 0))); +} + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vs_time.h linux-2.6.19-vs2.1.x-t1/include/linux/vs_time.h --- linux-2.6.19/include/linux/vs_time.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vs_time.h 2006-11-30 18:53:18 +0100 @@ -0,0 +1,19 @@ +#ifndef _VS_TIME_H +#define _VS_TIME_H + + +/* time faking stuff */ + +#ifdef CONFIG_VSERVER_VTIME + +extern void vx_gettimeofday(struct timeval *tv); +extern int vx_settimeofday(struct timespec *ts); + +#else +#define vx_gettimeofday(t) do_gettimeofday(t) +#define vx_settimeofday(t) do_settimeofday(t) +#endif + +#else +#warning duplicate inclusion +#endif diff -NurpP --minimal linux-2.6.19/include/linux/vserver/Kbuild linux-2.6.19-vs2.1.x-t1/include/linux/vserver/Kbuild --- linux-2.6.19/include/linux/vserver/Kbuild 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vserver/Kbuild 2006-11-08 04:57:49 +0100 @@ -0,0 +1,9 @@ + +unifdef-y += context_cmd.h network_cmd.h namespace_cmd.h \ + cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \ + inode_cmd.h sched_cmd.h signal_cmd.h debug_cmd.h + +unifdef-y += switch.h network.h monitor.h + +unifdef-y += legacy.h + diff -NurpP --minimal linux-2.6.19/include/linux/vserver/base.h linux-2.6.19-vs2.1.x-t1/include/linux/vserver/base.h --- linux-2.6.19/include/linux/vserver/base.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.19-vs2.1.x-t1/include/linux/vserver/base.h 2006-11-30 19:29:45 +0100 @@ -0,0 +1,237 @@ +#ifndef _VX_BASE_H +#define _VX_BASE_H + + +/* context state changes */ + +enum { + VSC_STARTUP = 1, + VSC_SHUTDOWN, + + VSC_NETUP, + VSC_NETDOWN, +}; + + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ + +#ifdef CONFIG_VSERVER_DYNAMIC_IDS +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ +#else +#define MIN_D_CONTEXT 65536 +#endif + +/* check conditions */ + +#define VS_ADMIN 0x0001 +#define VS_WATCH 0x0002 +#define VS_HIDE 0x0004 +#define VS_HOSTID 0x0008 + +#define VS_IDENT 0x0010 +#define VS_EQUIV 0x0020 +#define VS_PARENT 0x0040 +#define VS_CHILD 0x0080 + +#define VS_ARG_MASK 0x00F0 + +#define VS_DYNAMIC 0x0100 +#define VS_STATIC 0x0200 + +#define VS_ATR_MASK 0x0F00 + +#ifdef CONFIG_VSERVER_PRIVACY +#define VS_ADMIN_P (0) +#define VS_WATCH_P (0) +#else +#define VS_ADMIN_P VS_ADMIN +#define VS_WATCH_P VS_WATCH +#endif + +#define VS_HARDIRQ 0x1000 +#define VS_SOFTIRQ 0x2000 +#define VS_IRQ 0x4000 + +#define VS_IRQ_MASK 0xF000 + +#include + +/* + * check current context for ADMIN/WATCH and + * optionally against supplied argument + */ +static inline int __vs_check(int cid, int id, unsigned int mode) +{ + if (mode & VS_ARG_MAS