diff -NurpP --minimal linux-2.6.31.2-vs2.3.0.36.14-pre12/include/linux/sched.h linux-2.6.31.2-vs2.3.0.36.14/include/linux/sched.h --- linux-2.6.31.2-vs2.3.0.36.14-pre12/include/linux/sched.h 2009-10-06 02:59:08.000000000 +0200 +++ linux-2.6.31.2-vs2.3.0.36.14/include/linux/sched.h 2009-10-06 05:02:05.000000000 +0200 @@ -182,13 +182,12 @@ extern unsigned long long time_sync_thre #define TASK_UNINTERRUPTIBLE 2 #define __TASK_STOPPED 4 #define __TASK_TRACED 8 -#define TASK_ONHOLD 16 /* in tsk->exit_state */ -#define EXIT_ZOMBIE 32 -#define EXIT_DEAD 64 +#define EXIT_ZOMBIE 16 +#define EXIT_DEAD 32 /* in tsk->state again */ -#define TASK_DEAD 128 -#define TASK_WAKEKILL 256 +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -1187,9 +1186,7 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; -#ifdef CONFIG_VSERVER_HARDCPU - struct list_head hq; -#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ struct hlist_head preempt_notifiers; diff -NurpP --minimal linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched.c linux-2.6.31.2-vs2.3.0.36.14/kernel/sched.c --- linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched.c 2009-09-27 00:59:43.000000000 +0200 +++ linux-2.6.31.2-vs2.3.0.36.14/kernel/sched.c 2009-10-06 05:02:21.000000000 +0200 @@ -641,16 +641,6 @@ struct rq { #endif struct hrtimer hrtick_timer; #endif - unsigned long norm_time; - unsigned long idle_time; -#ifdef CONFIG_VSERVER_IDLETIME - int idle_skip; -#endif -#ifdef CONFIG_VSERVER_HARDCPU - struct list_head hold_queue; - unsigned long nr_onhold; - int idle_tokens; -#endif #ifdef CONFIG_SCHEDSTATS /* latency stats */ @@ -1926,8 +1916,6 @@ static inline void check_class_changed(s p->sched_class->prio_changed(rq, p, oldprio, running); } -#include "sched_mon.h" - #ifdef CONFIG_SMP /* Used instead of source_load when we know the type == 0 */ @@ -2020,7 +2008,6 @@ migrate_task(struct task_struct *p, int { struct rq *rq = task_rq(p); - vxm_migrate_task(p, rq, dest_cpu); /* * If the task is not on a runqueue (and not running), then * it is sufficient to simply update the task's cpu field. @@ -2413,8 +2400,6 @@ void task_oncpu_function_call(struct tas preempt_enable(); } -#include "sched_hard.h" - /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2459,13 +2444,6 @@ static int try_to_wake_up(struct task_st rq = task_rq_lock(p, &flags); update_rq_clock(rq); old_state = p->state; - - /* we need to unhold suspended tasks */ - if (old_state & TASK_ONHOLD) { - vx_unhold_task(p, rq); - old_state = p->state; - } - if (!(old_state & state)) goto out; @@ -2487,12 +2465,6 @@ static int try_to_wake_up(struct task_st /* might preempt at this point */ rq = task_rq_lock(p, &flags); old_state = p->state; - - /* we need to unhold suspended tasks - if (old_state & TASK_ONHOLD) { - vx_unhold_task(p, rq); - old_state = p->state; - } */ if (!(old_state & state)) goto out; if (p->se.on_rq) @@ -5401,11 +5373,6 @@ need_resched_nonpreemptible: idle_balance(cpu, rq); put_prev_task(rq, prev); - - vx_set_rq_time(rq, jiffies); /* update time */ - vx_schedule(prev, rq, cpu); /* hold if over limit */ - vx_try_unhold(rq, cpu); /* unhold if refilled */ - next = pick_next_task(rq); if (likely(prev != next)) { @@ -9362,10 +9329,7 @@ void __init sched_init(void) #endif #endif /* CONFIG_FAIR_GROUP_SCHED */ -#ifdef CONFIG_VSERVER_HARDCPU - INIT_LIST_HEAD(&rq->hold_queue); - rq->nr_onhold = 0; -#endif + rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; #ifdef CONFIG_RT_GROUP_SCHED INIT_LIST_HEAD(&rq->leaf_rt_rq_list); diff -NurpP --minimal linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched_hard.h linux-2.6.31.2-vs2.3.0.36.14/kernel/sched_hard.h --- linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched_hard.h 2009-09-10 16:11:43.000000000 +0200 +++ linux-2.6.31.2-vs2.3.0.36.14/kernel/sched_hard.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,353 +0,0 @@ - -#ifdef CONFIG_VSERVER_IDLELIMIT - -/* - * vx_idle_resched - reschedule after maxidle - */ -static inline -void vx_idle_resched(struct rq *rq) -{ - /* maybe have a better criterion for paused */ - if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) - set_need_resched(); -} - -#else /* !CONFIG_VSERVER_IDLELIMIT */ - -#define vx_idle_resched(rq) - -#endif /* CONFIG_VSERVER_IDLELIMIT */ - - - -#ifdef CONFIG_VSERVER_IDLETIME - -#define vx_set_rq_min_skip(rq, min) \ - (rq)->idle_skip = (min) - -#define vx_save_min_skip(ret, min, val) \ - __vx_save_min_skip(ret, min, val) - -static inline -void __vx_save_min_skip(int ret, int *min, int val) -{ - if (ret > -2) - return; - if ((*min > val) || !*min) - *min = val; -} - -static inline -int vx_try_skip(struct rq *rq, int cpu) -{ - /* artificially advance time */ - if (rq->idle_skip > 0) { - vxdprintk(list_empty(&rq->hold_queue), - "hold queue empty on cpu %d", cpu); - rq->idle_time += rq->idle_skip; - vxm_idle_skip(rq, cpu); - return 1; - } - return 0; -} - -#else /* !CONFIG_VSERVER_IDLETIME */ - -#define vx_set_rq_min_skip(rq, min) \ - ({ int dummy = (min); dummy; }) - -#define vx_save_min_skip(ret, min, val) - -static inline -int vx_try_skip(struct rq *rq, int cpu) -{ - return 0; -} - -#endif /* CONFIG_VSERVER_IDLETIME */ - - - -#ifdef CONFIG_VSERVER_HARDCPU - -#define vx_set_rq_max_idle(rq, max) \ - (rq)->idle_tokens = (max) - -#define vx_save_max_idle(ret, min, val) \ - __vx_save_max_idle(ret, min, val) - -static inline -void __vx_save_max_idle(int ret, int *min, int val) -{ - if (*min > val) - *min = val; -} - - -/* - * vx_hold_task - put a task on the hold queue - */ -static inline -void vx_hold_task(struct task_struct *p, struct rq *rq) -{ - // printk("@ hold_task(%p[%lx])\n", p, p->state); - - /* ignore dead/killed tasks */ - if (unlikely(p->state & (TASK_DEAD | TASK_WAKEKILL))) - return; - - /* ignore sleeping tasks */ - if (unlikely(p->state & TASK_NORMAL)) - return; - - /* remove task from runqueue */ - if (likely(p->se.on_rq)) - dequeue_task(rq, p, 0); - else - printk("@ woops, task %p not on runqueue?\n", p); - - p->state |= TASK_ONHOLD; - /* a new one on hold */ - rq->nr_onhold++; - vxm_hold_task(p, rq); - list_add_tail(&p->hq, &rq->hold_queue); - // list_add_tail(&p->run_list, &rq->hold_queue); -} - -/* - * vx_unhold_task - put a task back to the runqueue - */ -static inline -void vx_unhold_task(struct task_struct *p, struct rq *rq) -{ - // printk("@ unhold_task(%p[%lx])\n", p, p->state); - list_del_init(&p->hq); - // list_del(&p->run_list); - /* one less waiting */ - rq->nr_onhold--; - p->state &= ~TASK_ONHOLD; - enqueue_task(rq, p, 0); - // ? inc_nr_running(p, rq); - vxm_unhold_task(p, rq); -} - -/* - * vx_remove_hold - remove a task from the hold queue - */ -static inline -void vx_remove_hold(struct task_struct *p, struct rq *rq) -{ - printk("@ remove_hold(%p[%lx])\n", p, p->state); - list_del_init(&p->hq); - // list_del(&p->run_list); - /* one less waiting */ - rq->nr_onhold--; - p->state &= ~TASK_ONHOLD; -} - -unsigned long nr_onhold(void) -{ - unsigned long i, sum = 0; - - for_each_online_cpu(i) - sum += cpu_rq(i)->nr_onhold; - - return sum; -} - - - -static inline -int __vx_tokens_avail(struct _vx_sched_pc *sched_pc) -{ - return sched_pc->tokens; -} - -static inline -void __vx_consume_token(struct _vx_sched_pc *sched_pc) -{ - sched_pc->tokens--; -} - -static inline -int vx_need_resched(struct task_struct *p, int slice, int cpu) -{ - struct vx_info *vxi = p->vx_info; - - if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) { - struct _vx_sched_pc *sched_pc = - &vx_per_cpu(vxi, sched_pc, cpu); - int tokens; - - /* maybe we can simplify that to decrement - the token counter unconditional? */ - - if ((tokens = __vx_tokens_avail(sched_pc)) > 0) - __vx_consume_token(sched_pc); - - /* for tokens > 0, one token was consumed */ - if (tokens < 2) - slice = 0; - } - vxm_need_resched(p, slice, cpu); - return (slice == 0); -} - - -#define vx_set_rq_time(rq, time) do { \ - rq->norm_time = time; \ -} while (0) - - -static inline -void vx_try_unhold(struct rq *rq, int cpu) -{ - struct vx_info *vxi = NULL; - struct list_head *l, *n; - int maxidle = HZ; - int minskip = 0; - - /* nothing to do? what about pause? */ - if (list_empty(&rq->hold_queue)) - return; - - list_for_each_safe(l, n, &rq->hold_queue) { - int ret, delta_min[2]; - struct _vx_sched_pc *sched_pc; - struct task_struct *p; - - p = list_entry(l, struct task_struct, hq); - /* don't bother with same context */ - if (vxi == p->vx_info) - continue; - - vxi = p->vx_info; - /* ignore paused contexts */ - if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0)) - continue; - - sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); - - /* recalc tokens */ - vxm_sched_info(sched_pc, vxi, cpu); - ret = vx_tokens_recalc(sched_pc, - &rq->norm_time, &rq->idle_time, delta_min); - vxm_tokens_recalc(sched_pc, rq, vxi, cpu); - - if (ret > 0) { - /* we found a runable context */ - vx_unhold_task(p, rq); - break; - } - vx_save_max_idle(ret, &maxidle, delta_min[0]); - vx_save_min_skip(ret, &minskip, delta_min[1]); - } - vx_set_rq_max_idle(rq, maxidle); - vx_set_rq_min_skip(rq, minskip); - vxm_rq_max_min(rq, cpu); -} - - -static inline -int vx_schedule(struct task_struct *next, struct rq *rq, int cpu) -{ - struct vx_info *vxi = next->vx_info; - struct _vx_sched_pc *sched_pc; - int delta_min[2]; - int flags, ret; - - if (!vxi) - return 1; - - flags = vxi->vx_flags; - - if (unlikely(vs_check_flags(flags, VXF_SCHED_PAUSE, 0))) - goto put_on_hold; - if (!vs_check_flags(flags, VXF_SCHED_HARD | VXF_SCHED_PRIO, 0)) - return 1; - - sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); -#ifdef CONFIG_SMP - /* update scheduler params */ - if (cpu_isset(cpu, vxi->sched.update)) { - vx_update_sched_param(&vxi->sched, sched_pc); - vxm_update_sched(sched_pc, vxi, cpu); - cpu_clear(cpu, vxi->sched.update); - } -#endif - vxm_sched_info(sched_pc, vxi, cpu); - ret = vx_tokens_recalc(sched_pc, - &rq->norm_time, &rq->idle_time, delta_min); - vxm_tokens_recalc(sched_pc, rq, vxi, cpu); - - if (!vs_check_flags(flags, VXF_SCHED_HARD, 0)) - return 1; - - if (unlikely(ret < 0)) { - vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]); - vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]); - vxm_rq_max_min(rq, cpu); - put_on_hold: - vx_hold_task(next, rq); - return 0; - } - return 1; -} - - -#else /* CONFIG_VSERVER_HARDCPU */ - -static inline -void vx_hold_task(struct task_struct *p, struct rq *rq) -{ - return; -} - -static inline -void vx_unhold_task(struct task_struct *p, struct rq *rq) -{ - return; -} - -unsigned long nr_onhold(void) -{ - return 0; -} - - -static inline -int vx_need_resched(struct task_struct *p, int slice, int cpu) -{ - return (slice == 0); -} - - -#define vx_set_rq_time(rq, time) - -static inline -void vx_try_unhold(struct rq *rq, int cpu) -{ - return; -} - -static inline -int vx_schedule(struct task_struct *next, struct rq *rq, int cpu) -{ - struct vx_info *vxi = next->vx_info; - struct _vx_sched_pc *sched_pc; - int delta_min[2]; - int ret; - - if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) - return 1; - - sched_pc = &vx_per_cpu(vxi, sched_pc, cpu); - vxm_sched_info(sched_pc, vxi, cpu); - ret = vx_tokens_recalc(sched_pc, - &rq->norm_time, &rq->idle_time, delta_min); - vxm_tokens_recalc(sched_pc, rq, vxi, cpu); - return 1; -} - -#endif /* CONFIG_VSERVER_HARDCPU */ - diff -NurpP --minimal linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched_mon.h linux-2.6.31.2-vs2.3.0.36.14/kernel/sched_mon.h --- linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/sched_mon.h 2009-09-10 16:11:43.000000000 +0200 +++ linux-2.6.31.2-vs2.3.0.36.14/kernel/sched_mon.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,200 +0,0 @@ - -#include - -#ifdef CONFIG_VSERVER_MONITOR - -#ifdef CONFIG_VSERVER_HARDCPU -#define HARDCPU(x) (x) -#else -#define HARDCPU(x) (0) -#endif - -#ifdef CONFIG_VSERVER_IDLETIME -#define IDLETIME(x) (x) -#else -#define IDLETIME(x) (0) -#endif - -struct _vx_mon_entry *vxm_advance(int cpu); - - -static inline -void __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type) -{ - entry->type = type; - entry->xid = xid; -} - -static inline -void __vxm_sync(int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - __vxm_basic(entry, 0, VXM_SYNC); - entry->ev.sec = xtime.tv_sec; - entry->ev.nsec = xtime.tv_nsec; -} - -static inline -void __vxm_task(struct task_struct *p, int type) -{ - struct _vx_mon_entry *entry = vxm_advance(task_cpu(p)); - - __vxm_basic(entry, p->xid, type); - entry->ev.tsk.pid = p->pid; - entry->ev.tsk.state = p->state; -} - -static inline -void __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags)); - entry->sd.tokens = s->tokens; - entry->sd.norm_time = s->norm_time; - entry->sd.idle_time = s->idle_time; -} - -static inline -void __vxm_rqinfo1(struct rq *q, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - entry->type = VXM_RQINFO_1; - entry->xid = ((unsigned long)q >> 16) & 0xffff; - entry->q1.running = q->nr_running; - entry->q1.onhold = HARDCPU(q->nr_onhold); - entry->q1.iowait = atomic_read(&q->nr_iowait); - entry->q1.uintr = q->nr_uninterruptible; - entry->q1.idle_tokens = IDLETIME(q->idle_tokens); -} - -static inline -void __vxm_rqinfo2(struct rq *q, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - entry->type = VXM_RQINFO_2; - entry->xid = (unsigned long)q & 0xffff; - entry->q2.norm_time = q->norm_time; - entry->q2.idle_time = q->idle_time; - entry->q2.idle_skip = IDLETIME(q->idle_skip); -} - -static inline -void __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - __vxm_basic(entry, vxi->vx_id, VXM_UPDATE); - entry->ev.tokens = s->tokens; -} - -static inline -void __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1); - entry->u1.tokens_max = s->tokens_max; - entry->u1.fill_rate = s->fill_rate[0]; - entry->u1.interval = s->interval[0]; -} - -static inline -void __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) -{ - struct _vx_mon_entry *entry = vxm_advance(cpu); - - __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2); - entry->u2.tokens_min = s->tokens_min; - entry->u2.fill_rate = s->fill_rate[1]; - entry->u2.interval = s->interval[1]; -} - - -#define vxm_activate_task(p,q) __vxm_task(p, VXM_ACTIVATE) -#define vxm_activate_idle(p,q) __vxm_task(p, VXM_IDLE) -#define vxm_deactivate_task(p,q) __vxm_task(p, VXM_DEACTIVATE) -#define vxm_hold_task(p,q) __vxm_task(p, VXM_HOLD) -#define vxm_unhold_task(p,q) __vxm_task(p, VXM_UNHOLD) - -static inline -void vxm_migrate_task(struct task_struct *p, struct rq *rq, int dest) -{ - __vxm_task(p, VXM_MIGRATE); - __vxm_rqinfo1(rq, task_cpu(p)); - __vxm_rqinfo2(rq, task_cpu(p)); -} - -static inline -void vxm_idle_skip(struct rq *rq, int cpu) -{ - __vxm_rqinfo1(rq, cpu); - __vxm_rqinfo2(rq, cpu); -} - -static inline -void vxm_need_resched(struct task_struct *p, int slice, int cpu) -{ - if (slice) - return; - - __vxm_task(p, VXM_RESCHED); -} - -static inline -void vxm_sync(unsigned long now, int cpu) -{ - if (!CONFIG_VSERVER_MONITOR_SYNC || - (now % CONFIG_VSERVER_MONITOR_SYNC)) - return; - - __vxm_sync(cpu); -} - -#define vxm_sched_info(s,v,c) __vxm_sched(s,v,c) - -static inline -void vxm_tokens_recalc(struct _vx_sched_pc *s, struct rq *rq, - struct vx_info *vxi, int cpu) -{ - __vxm_sched(s, vxi, cpu); - __vxm_rqinfo2(rq, cpu); -} - -static inline -void vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) -{ - __vxm_sched(s, vxi, cpu); - __vxm_update(s, vxi, cpu); - __vxm_update1(s, vxi, cpu); - __vxm_update2(s, vxi, cpu); -} - -static inline -void vxm_rq_max_min(struct rq *rq, int cpu) -{ - __vxm_rqinfo1(rq, cpu); - __vxm_rqinfo2(rq, cpu); -} - -#else /* CONFIG_VSERVER_MONITOR */ - -#define vxm_activate_task(t,q) do { } while (0) -#define vxm_activate_idle(t,q) do { } while (0) -#define vxm_deactivate_task(t,q) do { } while (0) -#define vxm_hold_task(t,q) do { } while (0) -#define vxm_unhold_task(t,q) do { } while (0) -#define vxm_migrate_task(t,q,d) do { } while (0) -#define vxm_idle_skip(q,c) do { } while (0) -#define vxm_need_resched(t,s,c) do { } while (0) -#define vxm_sync(s,c) do { } while (0) -#define vxm_sched_info(s,v,c) do { } while (0) -#define vxm_tokens_recalc(s,q,v,c) do { } while (0) -#define vxm_update_sched(s,v,c) do { } while (0) -#define vxm_rq_max_min(q,c) do { } while (0) - -#endif /* CONFIG_VSERVER_MONITOR */ - diff -NurpP --minimal linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/timer.c linux-2.6.31.2-vs2.3.0.36.14/kernel/timer.c --- linux-2.6.31.2-vs2.3.0.36.14-pre12/kernel/timer.c 2009-09-10 16:38:09.000000000 +0200 +++ linux-2.6.31.2-vs2.3.0.36.14/kernel/timer.c 2009-10-06 04:09:06.000000000 +0200 @@ -1148,25 +1148,6 @@ unsigned long get_next_timer_interrupt(u } #endif -static inline -void __vx_consume_token(struct _vx_sched_pc *sched_pc) -{ - sched_pc->tokens--; -} - -static inline -void vx_hard_tick(struct task_struct *p, int cpu) -{ - struct vx_info *vxi = p->vx_info; - - if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) { - struct _vx_sched_pc *sched_pc = - &vx_per_cpu(vxi, sched_pc, cpu); - - __vx_consume_token(sched_pc); - } -} - /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. @@ -1183,7 +1164,6 @@ void update_process_times(int user_tick) rcu_check_callbacks(cpu, user_tick); printk_tick(); scheduler_tick(); - vx_hard_tick(p, cpu); run_posix_cpu_timers(p); } @@ -1436,10 +1416,6 @@ int do_sysinfo(struct sysinfo *info) ktime_get_ts(&tp); monotonic_to_bootbased(&tp); - - if (vx_flags(VXF_VIRT_UPTIME, 0)) - vx_vsi_uptime(&tp, NULL); - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);