--- linux-2.6.14/fs/proc/array.c 2005-10-28 20:49:45 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/proc/array.c 2005-10-29 04:15:18 +0200 @@ -135,7 +138,8 @@ static const char *task_state_array[] = "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "H (on hold)" /* 64 */ }; static inline const char * get_task_state(struct task_struct *tsk) --- linux-2.6.14/fs/proc/array.c 2005-10-28 20:49:45 +0200 +++ linux-2.6.14-vs2.0.1-pre3/fs/proc/array.c 2005-10-29 04:15:18 +0200 @@ -144,7 +148,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; --- linux-2.6.14/include/linux/sched.h 2005-10-28 20:49:56 +0200 +++ linux-2.6.14-vs2.0.1-pre3/include/linux/sched.h 2005-10-30 04:29:36 +0100 @@ -127,6 +128,7 @@ extern unsigned long nr_iowait(void); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_ONHOLD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) --- linux-2.6.14/include/linux/vserver/sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.14-vs2.0.1-pre3/include/linux/vserver/sched.h 2005-10-29 04:15:18 +0200 @@ -0,0 +1,25 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +struct task_struct; + +int vx_effective_vavavoom(struct vx_info *, int); + +int vx_tokens_recalc(struct vx_info *); + +#endif /* __KERNEL__ */ +#else /* _VX_SCHED_H */ +#warning duplicate inclusion +#endif /* _VX_SCHED_H */ --- linux-2.6.14/kernel/exit.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/exit.c 2005-10-30 04:29:36 +0100 @@ -235,6 +246,7 @@ static inline void reparent_to_init(void ptrace_unlink(current); /* Reparent to init */ REMOVE_LINKS(current); + /* FIXME handle vchild_reaper/initpid */ current->parent = child_reaper; current->real_parent = child_reaper; SET_LINKS(current); --- linux-2.6.14/kernel/exit.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/exit.c 2005-10-30 04:29:36 +0100 @@ -609,6 +622,7 @@ static inline void forget_original_paren struct task_struct *p, *reaper = father; struct list_head *_p, *_n; + /* FIXME handle vchild_reaper/initpid */ do { reaper = next_thread(reaper); if (reaper == father) { --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -236,6 +239,10 @@ struct runqueue { task_t *migration_thread; struct list_head migration_queue; #endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -596,6 +603,7 @@ static inline void sched_info_switch(tas */ static void dequeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -604,6 +612,7 @@ static void dequeue_task(struct task_str static void enqueue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -617,11 +626,13 @@ static void enqueue_task(struct task_str */ static void requeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_move_tail(&p->run_list, array->queue + p->prio); } static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -645,6 +656,7 @@ static inline void enqueue_task_head(str static int effective_prio(task_t *p) { int bonus, prio; + struct vx_info *vxi; if (rt_task(p)) return p->prio; --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -652,6 +664,11 @@ static int effective_prio(task_t *p) bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + + if ((vxi = p->vx_info) && + vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + prio += vx_effective_vavavoom(vxi, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -785,19 +802,77 @@ static void activate_task(task_t *p, run } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void __deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; dequeue_task(p, p->array); p->array = NULL; } +static inline +void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + vx_deactivate_task(p); + __deactivate_task(p, rq); +} + + +#ifdef CONFIG_VSERVER_HARDCPU +/* + * vx_hold_task - put a task on the hold queue + */ +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + __deactivate_task(p, rq); + p->state |= TASK_ONHOLD; + /* a new one on hold */ + vx_onhold_inc(vxi); + list_add_tail(&p->run_list, &rq->hold_queue); +} + +/* + * vx_unhold_task - put a task back to the runqueue + */ +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + list_del(&p->run_list); + /* one less waiting */ + vx_onhold_dec(vxi); + p->state &= ~TASK_ONHOLD; + enqueue_task(p, rq->expired); + rq->nr_running++; + + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; +} +#else +static inline +void vx_hold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} + +static inline +void vx_unhold_task(struct vx_info *vxi, + struct task_struct *p, runqueue_t *rq) +{ + return; +} +#endif /* CONFIG_VSERVER_HARDCPU */ + + /* * resched_task - mark a task 'to be rescheduled now'. * --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -1154,6 +1229,12 @@ static int try_to_wake_up(task_t *p, uns rq = task_rq_lock(p, &flags); old_state = p->state; + + /* we need to unhold suspended tasks */ + if (old_state & TASK_ONHOLD) { + vx_unhold_task(p->vx_info, p, rq); + old_state = p->state; + } if (!(old_state & state)) goto out; --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -1274,6 +1355,11 @@ out_activate: __activate_task(p, rq); else activate_task(p, rq, cpu == this_cpu); + + /* this is to get the accounting behind the load update */ + if (old_state & TASK_UNINTERRUPTIBLE) + vx_uninterruptible_dec(p); + /* * Sync wakeups (i.e. those types of wakeups where the waker * has indicated that it will leave the CPU in short order) --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -1397,6 +1483,7 @@ void fastcall wake_up_new_task(task_t *p p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -1408,6 +1495,7 @@ void fastcall wake_up_new_task(task_t *p __activate_task(p, rq); else { p->prio = current->prio; + BUG_ON(p->state & TASK_ONHOLD); list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2470,13 +2558,16 @@ unsigned long long current_sched_time(co void account_user_time(struct task_struct *p, cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; + int nice = (TASK_NICE(p) > 0); p->utime = cputime_add(p->utime, cputime); + vx_account_user(vxi, cputime, nice); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (nice) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2557,6 +2650,10 @@ void scheduler_tick(void) if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; +#ifdef CONFIG_VSERVER_HARDCPU_IDLE + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +#endif rebalance_tick(cpu, rq, SCHED_IDLE); return; } --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2589,7 +2686,7 @@ void scheduler_tick(void) } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2854,6 +2951,10 @@ asmlinkage void __sched schedule(void) unsigned long long now; unsigned long run_time; int cpu, idx, new_prio; + struct vx_info *vxi; +#ifdef CONFIG_VSERVER_HARDCPU + int maxidle = -HZ; +#endif /* * Test if we are atomic. Since do_exit() needs to call into --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2913,12 +3014,41 @@ need_resched_nonpreemptible: unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible++; + vx_uninterruptible_inc(prev); + } deactivate_task(prev, rq); } } +#ifdef CONFIG_VSERVER_HARDCPU + if (!list_empty(&rq->hold_queue)) { + struct list_head *l, *n; + int ret; + + vxi = NULL; + list_for_each_safe(l, n, &rq->hold_queue) { + next = list_entry(l, task_t, run_list); + if (vxi == next->vx_info) + continue; + + vxi = next->vx_info; + ret = vx_tokens_recalc(vxi); + + if (ret > 0) { + vx_unhold_task(vxi, next, rq); + break; + } + if ((ret < 0) && (maxidle < ret)) + maxidle = ret; + } + } + rq->idle_tokens = -maxidle; + +pick_next: +#endif + cpu = smp_processor_id(); if (unlikely(!rq->nr_running)) { go_idle: --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -2966,6 +3096,22 @@ go_idle: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + vxi = next->vx_info; +#ifdef CONFIG_VSERVER_HARDCPU + if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) { + int ret = vx_tokens_recalc(vxi); + + if (unlikely(ret <= 0)) { + if (ret && (rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; + vx_hold_task(vxi, next, rq); + goto pick_next; + } + } else /* well, looks ugly but not as ugly as the ifdef-ed version */ +#endif + if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + vx_tokens_recalc(vxi); + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; if (unlikely((long long)(now - next->timestamp) < 0)) --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -3671,6 +3817,7 @@ recheck: oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -5524,6 +5671,9 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->migration_queue); #endif atomic_set(&rq->nr_iowait, 0); +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); +#endif for (j = 0; j < 2; j++) { array = rq->arrays + j; --- linux-2.6.14/kernel/sched.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/sched.c 2005-10-29 04:19:07 +0200 @@ -5593,6 +5743,7 @@ void normalize_rt_tasks(void) deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); resched_task(rq->curr); } --- linux-2.6.14/kernel/signal.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/signal.c 2005-10-29 04:15:18 +0200 @@ -646,19 +646,27 @@ static int rm_from_queue(unsigned long m static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + int user; int error = -EINVAL; + if (!valid_signal(sig)) return error; + + user = (!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))); + error = -EPERM; - if ((!info || ((unsigned long)info != 1 && - (unsigned long)info != 2 && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (user && ((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) return error; + error = -ESRCH; + if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT)) + return error; + error = security_task_kill(t, info, sig); if (!error) audit_signal_info(sig, t); /* Let audit system see the signal */ --- linux-2.6.14/kernel/signal.c 2005-10-28 20:49:57 +0200 +++ linux-2.6.14-vs2.0.1-pre3/kernel/signal.c 2005-10-29 04:15:18 +0200 @@ -1928,6 +1936,11 @@ relock: if (current->pid == 1) continue; + /* virtual init is protected against user signals */ + if ((info->si_code == SI_USER) && + vx_current_initpid(current->pid)) + continue; + if (sig_kernel_stop(signr)) { /* * The default action is to stop all threads in --- linux-2.6.14/kernel/vserver/sched.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.14-vs2.0.1-pre3/kernel/vserver/sched.c 2005-10-30 04:29:36 +0100 @@ -0,0 +1,218 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens = 0; + + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + delta = jiffies - vxi->sched.jiffies; + + if (delta >= vxi->sched.interval) { + /* lockdown scheduler info */ + spin_lock(&vxi->sched.tokens_lock); + + /* calc integral token part */ + delta = jiffies - vxi->sched.jiffies; + tokens = delta / vxi->sched.interval; + delta = tokens * vxi->sched.interval; + tokens *= vxi->sched.fill_rate; + + atomic_add(tokens, &vxi->sched.tokens); + vxi->sched.jiffies += delta; + tokens = atomic_read(&vxi->sched.tokens); + + if (tokens > vxi->sched.tokens_max) { + tokens = vxi->sched.tokens_max; + atomic_set(&vxi->sched.tokens, tokens); + } + spin_unlock(&vxi->sched.tokens_lock); + } else { + /* no new tokens */ + tokens = vx_tokens_avail(vxi); + if (tokens <= 0) + vxi->vx_state |= VXS_ONHOLD; + if (tokens < vxi->sched.tokens_min) { + /* enough tokens will be available in */ + if (vxi->sched.tokens_min == 0) + return delta - vxi->sched.interval; + return delta - vxi->sched.interval * + vxi->sched.tokens_min / vxi->sched.fill_rate; + } + } + + /* we have some tokens left */ + if (vx_info_state(vxi, VXS_ONHOLD) && + (tokens >= vxi->sched.tokens_min)) + vxi->vx_state &= ~VXS_ONHOLD; + if (vx_info_state(vxi, VXS_ONHOLD)) + tokens -= vxi->sched.tokens_min; + + return tokens; +} + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +int vx_effective_vavavoom(struct vx_info *vxi, int max_prio) +{ + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) { + max = vxi->sched.tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + } else + vavavoom = 0; + + vxi->sched.vavavoom = vavavoom; + return vavavoom; +} + + +int vc_set_sched_v2(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(xid); + if (!vxi) + return -EINVAL; + + spin_lock(&vxi->sched.tokens_lock); + + if (vc_data.interval != SCHED_KEEP) + vxi->sched.interval = vc_data.interval; + if (vc_data.fill_rate != SCHED_KEEP) + vxi->sched.fill_rate = vc_data.fill_rate; + if (vc_data.tokens_min != SCHED_KEEP) + vxi->sched.tokens_min = vc_data.tokens_min; + if (vc_data.tokens_max != SCHED_KEEP) + vxi->sched.tokens_max = vc_data.tokens_max; + if (vc_data.tokens != SCHED_KEEP) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vx_info *vxi; + unsigned int set_mask; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(xid); + if (!vxi) + return -EINVAL; + + set_mask = vc_data.set_mask; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate = vc_data.fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval = vc_data.interval; + if (set_mask & VXSM_TOKENS) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = vc_data.tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = vc_data.tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.priority_bias = vc_data.priority_bias; + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + if (vxi->sched.priority_bias > MAX_PRIO_BIAS) + vxi->sched.priority_bias = MAX_PRIO_BIAS; + if (vxi->sched.priority_bias < MIN_PRIO_BIAS) + vxi->sched.priority_bias = MIN_PRIO_BIAS; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + --- linux-2.6.14/kernel/vserver/signal.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.14-vs2.0.1-pre3/kernel/vserver/signal.c 2005-10-30 04:29:36 +0100 @@ -0,0 +1,119 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include + +#include +#include + +#include +#include + + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval, count=0; + struct vcmd_ctx_kill_v0 vc_data; + struct task_struct *p; + struct vx_info *vxi; + unsigned long priv = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = -ESRCH; + read_lock(&tasklist_lock); + switch (vc_data.pid) { + case 0: + priv = 1; + case -1: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != id || p->pid <= 1 || + (vc_data.pid && vxi->vx_initpid == p->pid)) + continue; + + err = group_send_sig_info(vc_data.sig, (void*)priv, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + case 1: + if (vxi->vx_initpid) { + vc_data.pid = vxi->vx_initpid; + priv = 1; + } + /* fallthrough */ + default: + p = find_task_by_real_pid(vc_data.pid); + if (p) { + if ((id == -1) || (vx_task_xid(p) == id)) + retval = group_send_sig_info(vc_data.sig, + (void*)priv, p); + } + break; + } + read_unlock(&tasklist_lock); + put_vx_info(vxi); + return retval; +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, VXS_SHUTDOWN|VXS_HASHED) == VXS_SHUTDOWN) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_wait, &wait); + return ret; +} + + + +int vc_wait_exit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + int ret; + + vxi = locate_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = __wait_exit(vxi); + put_vx_info(vxi); + return ret; +} +