--- linux-2.6.16-rc4/fs/proc/array.c 2006-02-18 14:40:26 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/fs/proc/array.c 2006-02-17 23:26:32 +0100 @@ -135,7 +138,9 @@ static const char *task_state_array[] = "T (stopped)", /* 4 */ "T (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "X (dead)", /* 32 */ + "N (noninteractive)", /* 64 */ + "H (on hold)" /* 128 */ }; static inline const char * get_task_state(struct task_struct *tsk) --- linux-2.6.16-rc4/fs/proc/array.c 2006-02-18 14:40:26 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/fs/proc/array.c 2006-02-17 23:26:32 +0100 @@ -144,7 +149,8 @@ static inline const char * get_task_stat TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_STOPPED | - TASK_TRACED)) | + TASK_TRACED | + TASK_ONHOLD)) | (tsk->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)); const char **p = &task_state_array[0]; --- linux-2.6.16-rc4/include/linux/sched.h 2006-02-18 14:40:35 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/include/linux/sched.h 2006-02-18 15:33:57 +0100 @@ -129,6 +131,7 @@ extern unsigned long nr_iowait(void); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_ONHOLD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) --- linux-2.6.16-rc4/include/linux/vserver/sched.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/include/linux/vserver/sched.h 2006-02-17 23:26:33 +0100 @@ -0,0 +1,26 @@ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *, struct timespec *); + + +struct vx_info; + +void vx_update_load(struct vx_info *); + + +int vx_tokens_recalc(struct _vx_sched_pc *, + unsigned long *, unsigned long *, int [2]); + +void vx_update_sched_param(struct _vx_sched *sched, + struct _vx_sched_pc *sched_pc); + +#endif /* __KERNEL__ */ +#else /* _VX_SCHED_H */ +#warning duplicate inclusion +#endif /* _VX_SCHED_H */ --- linux-2.6.16-rc4/include/linux/vserver/signal.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/include/linux/vserver/signal.h 2006-02-17 23:26:33 +0100 @@ -0,0 +1,14 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + + +#ifdef __KERNEL__ + +struct vx_info; + +int vx_info_kill(struct vx_info *, int, int); + +#endif /* __KERNEL__ */ +#else /* _VX_SIGNAL_H */ +#warning duplicate inclusion +#endif /* _VX_SIGNAL_H */ --- linux-2.6.16-rc4/kernel/exit.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/exit.c 2006-02-20 23:19:58 +0100 @@ -360,6 +365,9 @@ void daemonize(const char *name, ...) fs = init_task.fs; current->fs = fs; atomic_inc(&fs->count); + exit_namespace(current); + current->namespace = init_task.namespace; + atomic_inc(¤t->namespace->count); exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -245,6 +249,16 @@ struct runqueue { task_t *migration_thread; struct list_head migration_queue; #endif + unsigned long norm_time; + unsigned long idle_time; +#ifdef CONFIG_VSERVER_IDLETIME + int idle_skip; +#endif +#ifdef CONFIG_VSERVER_HARDCPU + struct list_head hold_queue; + unsigned long nr_onhold; + int idle_tokens; +#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -605,6 +619,7 @@ static inline void sched_info_switch(tas */ static void dequeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); array->nr_active--; list_del(&p->run_list); if (list_empty(array->queue + p->prio)) --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -613,6 +628,7 @@ static void dequeue_task(struct task_str static void enqueue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); sched_info_queued(p); list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -626,6 +642,7 @@ static void enqueue_task(struct task_str */ static void requeue_task(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_move_tail(&p->run_list, array->queue + p->prio); } --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -631,6 +648,7 @@ ***** static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) { + BUG_ON(p->state & TASK_ONHOLD); list_add(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); array->nr_active++; --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -661,6 +679,10 @@ static int effective_prio(task_t *p) bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + + /* adjust effective priority */ + prio += vx_adjust_prio(p, prio, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -799,6 +821,7 @@ static void activate_task(task_t *p, run } p->timestamp = now; + vx_activate_task(p); __activate_task(p, rq); } --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -805,7 +828,7 @@ ***** /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void __deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; dequeue_task(p, p->array); --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -812,6 +835,16 @@ ***** p->array = NULL; } +static inline +void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + vx_deactivate_task(p); + __deactivate_task(p, rq); +} + + +#include "sched_hard.h" + /* * resched_task - mark a task 'to be rescheduled now'. * --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -1175,6 +1208,12 @@ static int try_to_wake_up(task_t *p, uns rq = task_rq_lock(p, &flags); old_state = p->state; + + /* we need to unhold suspended tasks */ + if (old_state & TASK_ONHOLD) { + vx_unhold_task(p, rq); + old_state = p->state; + } if (!(old_state & state)) goto out; --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -1291,10 +1330,16 @@ out_activate: * sleep is handled in a priority-neutral manner, no priority * boost and no penalty.) */ - if (old_state & TASK_NONINTERACTIVE) + if (old_state & TASK_NONINTERACTIVE) { + vx_activate_task(p); __activate_task(p, rq); - else + } else activate_task(p, rq, cpu == this_cpu); + + /* this is to get the accounting behind the load update */ + if (old_state & TASK_UNINTERRUPTIBLE) + vx_uninterruptible_dec(p); + /* * Sync wakeups (i.e. those types of wakeups where the waker * has indicated that it will leave the CPU in short order) --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -1418,6 +1463,7 @@ void fastcall wake_up_new_task(task_t *p p->prio = effective_prio(p); + vx_activate_task(p); if (likely(cpu == this_cpu)) { if (!(clone_flags & CLONE_VM)) { /* --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -1429,6 +1475,7 @@ void fastcall wake_up_new_task(task_t *p __activate_task(p, rq); else { p->prio = current->prio; + BUG_ON(p->state & TASK_ONHOLD); list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2498,7 +2548,7 @@ ***** /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (nice) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2577,6 +2629,7 @@ void scheduler_tick(void) if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; + vx_idle_resched(rq); rebalance_tick(cpu, rq, SCHED_IDLE); return; } --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2609,7 +2662,7 @@ void scheduler_tick(void) } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p, --p->time_slice, cpu)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2933,8 +2986,10 @@ need_resched_nonpreemptible: unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible++; + vx_uninterruptible_inc(prev); + } deactivate_task(prev, rq); } } --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2940,8 +2995,17 @@ ***** } cpu = smp_processor_id(); + vx_set_rq_time(rq, jiffies); +try_unhold: + vx_try_unhold(rq, cpu); +pick_next: + if (unlikely(!rq->nr_running)) { go_idle: + /* can we skip idle time? */ + if (vx_try_skip(rq)) + goto try_unhold; + idle_balance(cpu, rq); if (!rq->nr_running) { next = rq->idle; --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -2986,6 +3050,10 @@ go_idle: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + /* check before we schedule this context */ + if (!vx_schedule(next, rq, cpu)) + goto pick_next; + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; if (unlikely((long long)(now - next->timestamp) < 0)) --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -3701,6 +3769,7 @@ recheck: oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); if (array) { + vx_activate_task(p); __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -6041,7 +6110,10 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->migration_queue); #endif atomic_set(&rq->nr_iowait, 0); - +#ifdef CONFIG_VSERVER_HARDCPU + INIT_LIST_HEAD(&rq->hold_queue); + rq->nr_onhold = 0; +#endif for (j = 0; j < 2; j++) { array = rq->arrays + j; for (k = 0; k < MAX_PRIO; k++) { --- linux-2.6.16-rc4/kernel/sched.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched.c 2006-02-18 15:33:58 +0100 @@ -6110,6 +6182,7 @@ void normalize_rt_tasks(void) deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); if (array) { + vx_activate_task(p); __activate_task(p, task_rq(p)); resched_task(rq->curr); } --- linux-2.6.16-rc4/kernel/signal.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/signal.c 2006-02-20 22:02:55 +0100 @@ -676,12 +677,17 @@ static int rm_from_queue(unsigned long m static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + int user; int error = -EINVAL; + if (!valid_signal(sig)) return error; + + user = ((info == SEND_SIG_NOINFO) || + (!is_si_special(info) && SI_FROMUSER(info))); + error = -EPERM; - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && ((sig != SIGCONT) || + if (user && ((sig != SIGCONT) || (current->signal->session != t->signal->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) --- linux-2.6.16-rc4/kernel/signal.c 2006-02-18 14:40:37 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/signal.c 2006-02-20 22:02:55 +0100 @@ -688,6 +694,10 @@ ***** && !capable(CAP_KILL)) return error; + error = -ESRCH; + if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT)) + return error; + error = security_task_kill(t, info, sig); if (!error) audit_signal_info(sig, t); /* Let audit system see the signal */ --- linux-2.6.16-rc4/kernel/vserver/sched.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/vserver/sched.c 2006-02-22 02:41:35 +0100 @@ -0,0 +1,321 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004-2005 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define vxd_check_range(val, min, max) + + +void vx_update_sched_param(struct _vx_sched *sched, + struct _vx_sched_pc *sched_pc) +{ + unsigned int set_mask = sched->update_mask; + + if (set_mask & VXSM_FILL_RATE) + sched_pc->fill_rate[0] = sched->fill_rate[0]; + if (set_mask & VXSM_INTERVAL) + sched_pc->interval[0] = sched->interval[0]; + if (set_mask & VXSM_FILL_RATE2) + sched_pc->fill_rate[1] = sched->fill_rate[1]; + if (set_mask & VXSM_INTERVAL2) + sched_pc->interval[1] = sched->interval[1]; + if (set_mask & VXSM_TOKENS) + sched_pc->tokens = sched->tokens; + if (set_mask & VXSM_TOKENS_MIN) + sched_pc->tokens_min = sched->tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + sched_pc->tokens_max = sched->tokens_max; + + if (set_mask & VXSM_IDLE_TIME) + sched_pc->flags |= VXSF_IDLE_TIME; + else + sched_pc->flags &= ~VXSF_IDLE_TIME; + + /* reset time */ + sched_pc->norm_time = jiffies; +} + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret < 0 : on hold, check delta_min[] + * -1 only jiffies + * -2 also idle time + * + */ +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc, + unsigned long *norm_time, unsigned long *idle_time, int delta_min[2]) +{ + long delta; + long tokens = 0; + int flags = sched_pc->flags; + + /* how much time did pass? */ + delta = *norm_time - sched_pc->norm_time; + vxd_check_range(delta, 0, INT_MAX); + + if (delta >= sched_pc->interval[0]) { + long tokens, integral; + + /* calc integral token part */ + tokens = delta / sched_pc->interval[0]; + integral = tokens * sched_pc->interval[0]; + tokens *= sched_pc->fill_rate[0]; +#ifdef CONFIG_VSERVER_HARDCPU + delta_min[0] = delta - integral; + vxd_check_range(delta_min[0], 0, sched_pc->interval[0]); +#endif + /* advance time */ + sched_pc->norm_time += delta; + + /* add tokens */ + sched_pc->tokens += tokens; + sched_pc->token_time += tokens; + } + else + delta_min[0] = delta; + +#ifdef CONFIG_VSERVER_IDLETIME + if (!(flags & VXSF_IDLE_TIME)) + goto skip_idle; + + /* how much was the idle skip? */ + delta = *idle_time - sched_pc->idle_time; + vxd_check_range(delta, 0, INT_MAX); + + if (delta >= sched_pc->interval[1]) { + long tokens, integral; + + /* calc fair share token part */ + tokens = delta / sched_pc->interval[1]; + integral = tokens * sched_pc->interval[1]; + tokens *= sched_pc->fill_rate[1]; + delta_min[1] = delta - integral; + vxd_check_range(delta_min[1], 0, sched_pc->interval[1]); + + /* advance idle time */ + sched_pc->idle_time += integral; + + /* add tokens */ + sched_pc->tokens += tokens; + sched_pc->token_time += tokens; + } + else + delta_min[1] = delta; +skip_idle: +#endif + + /* clip at maximum */ + if (sched_pc->tokens > sched_pc->tokens_max) + sched_pc->tokens = sched_pc->tokens_max; + tokens = sched_pc->tokens; + + if ((flags & VXSF_ONHOLD)) { + /* can we unhold? */ + if (tokens >= sched_pc->tokens_min) { + flags &= ~VXSF_ONHOLD; + sched_pc->hold_ticks += + *norm_time - sched_pc->onhold; + } + else + goto on_hold; + } else { + /* put on hold? */ + if (tokens <= 0) { + flags |= VXSF_ONHOLD; + sched_pc->onhold = *norm_time; + goto on_hold; + } + } + sched_pc->flags = flags; + return tokens; + +on_hold: + tokens = sched_pc->tokens_min - tokens; + sched_pc->flags = flags; + BUG_ON(tokens < 0); + +#ifdef CONFIG_VSERVER_HARDCPU + if (likely(tokens)) + delta_min[0] = sched_pc->interval[0] * + tokens / sched_pc->fill_rate[0] - + delta_min[0]; + else + delta_min[0] = sched_pc->interval[0] - + delta_min[0]; + vxd_check_range(delta_min[0], 0, INT_MAX); + +#ifdef CONFIG_VSERVER_IDLETIME + if (!(flags & VXSF_IDLE_TIME)) + return -1; + + if (likely(tokens)) + delta_min[1] = sched_pc->interval[1] * + tokens / sched_pc->fill_rate[1] - + delta_min[1]; + else + delta_min[1] = sched_pc->interval[1] - + delta_min[1]; + vxd_check_range(delta_min[0], 0, INT_MAX); + + return -2; +#else + return -1; +#endif /* CONFIG_VSERVER_IDLETIME */ +#else + return 0; +#endif /* CONFIG_VSERVER_HARDCPU */ +} + + +int do_set_sched(struct vx_info *vxi, struct vcmd_set_sched_v4 *data) +{ + unsigned int set_mask = data->set_mask; + + /* Sanity check data values */ + if (data->fill_rate < 0) + data->fill_rate = 1; + if (data->interval <= 0) + data->interval = HZ; + if (data->tokens_max <= 0) + data->tokens_max = HZ; + if (data->tokens_min < 0) + data->tokens_min = data->fill_rate*3; + if (data->tokens_min >= data->tokens_max) + data->tokens_min = data->tokens_max; + + if (data->prio_bias > MAX_PRIO_BIAS) + data->prio_bias = MAX_PRIO_BIAS; + if (data->prio_bias < MIN_PRIO_BIAS) + data->prio_bias = MIN_PRIO_BIAS; + + spin_lock(&vxi->sched.tokens_lock); + + if (set_mask & VXSM_FILL_RATE) + vxi->sched.fill_rate[0] = data->fill_rate; + if (set_mask & VXSM_INTERVAL) + vxi->sched.interval[0] = data->interval; + if (set_mask & VXSM_FILL_RATE2) + vxi->sched.fill_rate[1] = data->fill_rate; + if (set_mask & VXSM_INTERVAL2) + vxi->sched.interval[1] = data->interval; + if (set_mask & VXSM_TOKENS) + vxi->sched.tokens = data->tokens; + if (set_mask & VXSM_TOKENS_MIN) + vxi->sched.tokens_min = data->tokens_min; + if (set_mask & VXSM_TOKENS_MAX) + vxi->sched.tokens_max = data->tokens_max; + if (set_mask & VXSM_PRIO_BIAS) + vxi->sched.prio_bias = data->prio_bias; + +#ifdef CONFIG_SMP + vxi->sched.update_mask = set_mask; + rmb(); + if (set_mask & VXSM_CPU_ID) + vxi->sched.update = cpumask_of_cpu(data->cpu_id); + else + vxi->sched.update = CPU_MASK_ALL; +#else + /* on UP we update immediately */ + vx_update_sched_param(&vxi->sched, + &vx_per_cpu(vxi, sched_pc, 0)); +#endif + + spin_unlock(&vxi->sched.tokens_lock); + return 0; +} + + +#ifdef CONFIG_VSERVER_LEGACY + +#define COPY_MASK_V2(name, mask) \ + if (vc_data.name != SCHED_KEEP) { \ + vc_data_v4.name = vc_data.name; \ + vc_data_v4.set_mask |= mask; \ + } + +int vc_set_sched_v2(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vcmd_set_sched_v4 vc_data_v4 = { .set_mask = 0 }; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -ESRCH; + + COPY_MASK_V2(fill_rate, VXSM_FILL_RATE); + COPY_MASK_V2(interval, VXSM_INTERVAL); + COPY_MASK_V2(tokens, VXSM_TOKENS); + COPY_MASK_V2(tokens_min, VXSM_TOKENS_MIN); + COPY_MASK_V2(tokens_max, VXSM_TOKENS_MAX); + vc_data_v4.bucket_id = 0; + + do_set_sched(vxi, &vc_data_v4); + put_vx_info(vxi); + return 0; +} +#endif + +int vc_set_sched_v3(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v3 vc_data; + struct vcmd_set_sched_v4 vc_data_v4; + struct vx_info *vxi; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -ESRCH; + + /* structures are binary compatible */ + memcpy(&vc_data_v4, &vc_data, sizeof(vc_data)); + vc_data_v4.set_mask &= VXSM_V3_MASK; + vc_data_v4.bucket_id = 0; + ret = do_set_sched(vxi, &vc_data_v4); + put_vx_info(vxi); + return ret; +} + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v4 vc_data; + struct vx_info *vxi; + int ret; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(xid); + if (!vxi) + return -ESRCH; + + ret = do_set_sched(vxi, &vc_data); + put_vx_info(vxi); + return ret; +} + --- linux-2.6.16-rc4/kernel/vserver/signal.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/vserver/signal.c 2006-02-17 23:26:33 +0100 @@ -0,0 +1,132 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2005 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include + +#include +#include + +#include +#include + + +int vx_info_kill(struct vx_info *vxi, int pid, int sig) +{ + int retval, count=0; + struct task_struct *p; + unsigned long priv = 0; + + retval = -ESRCH; + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d)*", + vxi, vxi->vx_id, pid, sig); + read_lock(&tasklist_lock); + switch (pid) { + case 0: + priv = 1; + case -1: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 || + (pid && vxi->vx_initpid == p->pid)) + continue; + + err = group_send_sig_info(sig, (void*)priv, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + case 1: + if (vxi->vx_initpid) { + pid = vxi->vx_initpid; + priv = 1; + } + /* fallthrough */ + default: + p = find_task_by_real_pid(pid); + if (p) { + if (vx_task_xid(p) == vxi->vx_id) + retval = group_send_sig_info(sig, + (void*)priv, p); + } + break; + } + read_unlock(&tasklist_lock); + vxdprintk(VXD_CBIT(misc, 4), + "vx_info_kill(%p[#%d],%d,%d) = %d", + vxi, vxi->vx_id, pid, sig, retval); + return retval; +} + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval; + struct vcmd_ctx_kill_v0 vc_data; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = vx_info_kill(vxi, vc_data.pid, vc_data.sig); + put_vx_info(vxi); + return retval; +} + + +static int __wait_exit(struct vx_info *vxi) +{ + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + add_wait_queue(&vxi->vx_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + +wait: + if (vx_info_state(vxi, VXS_SHUTDOWN|VXS_HASHED) == VXS_SHUTDOWN) + goto out; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + schedule(); + goto wait; + +out: + set_current_state(TASK_RUNNING); + remove_wait_queue(&vxi->vx_wait, &wait); + return ret; +} + + + +int vc_wait_exit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + int ret; + + vxi = lookup_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = __wait_exit(vxi); + put_vx_info(vxi); + return ret; +} + --- linux-2.6.16-rc4/kernel/sched_hard.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.16-rc4-vs2.1.1-rc8/kernel/sched_hard.h 2006-02-17 23:26:33 +0100 @@ -0,0 +1,315 @@ +#ifndef _VX_SCHED_HARD_H +#define _VX_SCHED_HARD_H + +#ifndef CONFIG_VSERVER +#warning config options missing +#endif + + +#ifdef CONFIG_VSERVER_IDLELIMIT + +/* + * vx_idle_resched - reschedule after maxidle + */ +static inline +void vx_idle_resched(runqueue_t *rq) +{ + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); +} + +#else /* !CONFIG_VSERVER_IDLELIMIT */ + +#define vx_idle_resched(rq) + +#endif /* CONFIG_VSERVER_IDLELIMIT */ + + + +#ifdef CONFIG_VSERVER_IDLETIME + +#define vx_set_rq_min_skip(rq, min) \ + (rq)->idle_skip = (min) + +#define vx_save_min_skip(ret, min, val) \ + __vx_save_min_skip(ret, min, val) + +static inline +void __vx_save_min_skip(int ret, int *min, int val) +{ + if (ret > -2) + return; + if ((*min > val) || !*min) + *min = val; +} + +static inline +int vx_try_skip(runqueue_t *rq) +{ + /* artificially advance time */ + if (rq->idle_skip && !list_empty(&rq->hold_queue)) { + rq->idle_time += rq->idle_skip; + return 1; + } + return 0; +} + +#else /* !CONFIG_VSERVER_IDLETIME */ + +#define vx_set_rq_min_skip(rq, min) \ + ({ int dummy = (min); dummy; }) + +#define vx_save_min_skip(ret, min, val) + +static inline +int vx_try_skip(runqueue_t *rq) +{ + return 0; +} + +#endif /* CONFIG_VSERVER_IDLETIME */ + + + +#ifdef CONFIG_VSERVER_HARDCPU + +#define vx_set_rq_max_idle(rq, max) \ + (rq)->idle_tokens = (max) + +#define vx_save_max_idle(ret, min, val) \ + __vx_save_max_idle(ret, min, val) + +static inline +void __vx_save_max_idle(int ret, int *min, int val) +{ + if (*min > val) + *min = val; +} + + +/* + * vx_hold_task - put a task on the hold queue + */ +static inline +void vx_hold_task(struct task_struct *p, runqueue_t *rq) +{ + __deactivate_task(p, rq); + p->state |= TASK_ONHOLD; + /* a new one on hold */ + rq->nr_onhold++; + list_add_tail(&p->run_list, &rq->hold_queue); +} + +/* + * vx_unhold_task - put a task back to the runqueue + */ +static inline +void vx_unhold_task(struct task_struct *p, runqueue_t *rq) +{ + list_del(&p->run_list); + /* one less waiting */ + rq->nr_onhold--; + p->state &= ~TASK_ONHOLD; + enqueue_task(p, rq->expired); + rq->nr_running++; + + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; +} + +unsigned long nr_onhold(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_onhold; + + return sum; +} + + + +static inline +int __vx_tokens_avail(struct _vx_sched_pc *sched_pc) +{ + return sched_pc->tokens; +} + +static inline +void __vx_consume_token(struct _vx_sched_pc *sched_pc) +{ + sched_pc->tokens--; +} + +static inline +int vx_need_resched(struct task_struct *p, int slice, int cpu) +{ + struct vx_info *vxi = p->vx_info; + + if (vxi) { + struct _vx_sched_pc *sched_pc = + &vx_per_cpu(vxi, sched_pc, cpu); + int tokens; + + /* maybe we can simplify that to decrement + the token counter unconditional? */ + + if ((tokens = __vx_tokens_avail(sched_pc)) > 0) + __vx_consume_token(sched_pc); + + /* for tokens > 0, one token was consumed */ + if (tokens < 2) + return 1; + } + return (slice == 0); +} + + +#define vx_set_rq_time(rq, time) do { \ + rq->norm_time = time; \ +} while (0) + + +static inline +void vx_try_unhold(runqueue_t *rq, int cpu) +{ + struct vx_info *vxi = NULL; + struct list_head *l, *n; + int maxidle = HZ; + int minskip = 0; + + /* nothing to do? */ + if (list_empty(&rq->hold_queue)) + return; + + list_for_each_safe(l, n, &rq->hold_queue) { + int ret, delta_min[2]; + struct _vx_sched_pc *sched_pc; + struct task_struct *p; + + p = list_entry(l, task_t, run_list); + if (vxi == p->vx_info) + continue; + + vxi = p->vx_info; + /* ignore paused contexts */ + if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) + continue; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); + + /* recalc tokens */ + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + + if (ret > 0) { + /* we found a runable context */ + vx_unhold_task(p, rq); + break; + } + vx_save_max_idle(ret, &maxidle, delta_min[0]); + vx_save_min_skip(ret, &minskip, delta_min[1]); + } + vx_set_rq_max_idle(rq, maxidle); + vx_set_rq_min_skip(rq, minskip); +} + + +static inline +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu) +{ + struct vx_info *vxi = next->vx_info; + struct _vx_sched_pc *sched_pc; + int delta_min[2]; + int flags, ret; + + if (!vxi) + return 1; + + flags = vxi->vx_flags; + + if (unlikely(vx_check_flags(flags , VXF_SCHED_PAUSE, 0))) + goto put_on_hold; + if (!vx_check_flags(flags , VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) + return 1; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); +#ifdef CONFIG_SMP + /* update scheduler params */ + if (cpu_isset(cpu, vxi->sched.update)) { + vx_update_sched_param(&vxi->sched, sched_pc); + cpu_clear(cpu, vxi->sched.update); + } +#endif + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + + if (!vx_check_flags(flags , VXF_SCHED_HARD, 0)) + return 1; + + if (unlikely(ret < 0)) { + vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]); + vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]); + put_on_hold: + vx_hold_task(next, rq); + return 0; + } + return 1; +} + + +#else /* CONFIG_VSERVER_HARDCPU */ + +static inline +void vx_hold_task(struct task_struct *p, runqueue_t *rq) +{ + return; +} + +static inline +void vx_unhold_task(struct task_struct *p, runqueue_t *rq) +{ + return; +} + +unsigned long nr_onhold(void) +{ + return 0; +} + + +static inline +int vx_need_resched(struct task_struct *p, int slice, int cpu) +{ + return (slice == 0); +} + + +#define vx_set_rq_time(rq, time) + +static inline +void vx_try_unhold(runqueue_t *rq, int cpu) +{ + return; +} + +static inline +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu) +{ + struct vx_info *vxi = next->vx_info; + struct _vx_sched_pc *sched_pc; + int delta_min[2]; + int ret; + + if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) + return 1; + + sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); + ret = vx_tokens_recalc(sched_pc, + &rq->norm_time, &rq->idle_time, delta_min); + return 1; +} + +#endif /* CONFIG_VSERVER_HARDCPU */ + +#endif /* _VX_SCHED_HARD_H */