--- linux-2.6.18.2/include/linux/vserver/monitor.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.18.2-vs2.1.1/include/linux/vserver/monitor.h 2006-10-07 17:50:13 +0200 @@ -0,0 +1,95 @@ +#ifndef _VX_MONITOR_H +#define _VX_MONITOR_H + + +enum { + VXM_UNUSED = 0, + + VXM_SYNC = 0x10, + + VXM_UPDATE = 0x20, + VXM_UPDATE_1, + VXM_UPDATE_2, + + VXM_RQINFO_1 = 0x24, + VXM_RQINFO_2, + + VXM_ACTIVATE = 0x40, + VXM_DEACTIVATE, + VXM_IDLE, + + VXM_HOLD = 0x44, + VXM_UNHOLD, + + VXM_MIGRATE = 0x48, + VXM_RESCHED, + + /* all other bits are flags */ + VXM_SCHED = 0x80, +}; + +struct _vxm_update_1 { + uint32_t tokens_max; + uint32_t fill_rate; + uint32_t interval; +}; + +struct _vxm_update_2 { + uint32_t tokens_min; + uint32_t fill_rate; + uint32_t interval; +}; + +struct _vxm_rqinfo_1 { + uint16_t running; + uint16_t onhold; + uint16_t iowait; + uint16_t uintr; + uint32_t idle_tokens; +}; + +struct _vxm_rqinfo_2 { + uint32_t norm_time; + uint32_t idle_time; + uint32_t idle_skip; +}; + +struct _vxm_sched { + uint32_t tokens; + uint32_t norm_time; + uint32_t idle_time; +}; + +struct _vxm_task { + uint16_t pid; + uint16_t state; +}; + +struct _vxm_event { + uint32_t jif; + union { + uint32_t seq; + uint32_t sec; + }; + union { + uint32_t tokens; + uint32_t nsec; + struct _vxm_task tsk; + }; +}; + +struct _vx_mon_entry { + uint16_t type; + uint16_t xid; + union { + struct _vxm_event ev; + struct _vxm_sched sd; + struct _vxm_update_1 u1; + struct _vxm_update_2 u2; + struct _vxm_rqinfo_1 q1; + struct _vxm_rqinfo_2 q2; + }; +}; + + +#endif /* _VX_MONITOR_H */ --- linux-2.6.18.2/kernel/sched.c 2006-11-04 19:43:24 +0100 +++ linux-2.6.18.2-vs2.1.1/kernel/sched.c 2006-10-28 19:04:30 +0200 @@ -3030,6 +3082,7 @@ void scheduler_tick(void) struct rq *rq = cpu_rq(cpu); update_cpu_clock(p, rq, now); + vxm_sync(now, cpu); rq->timestamp_last_tick = now; --- linux-2.6.18.2/kernel/sched_mon.h 1970-01-01 01:00:00 +0100 +++ linux-2.6.18.2-vs2.1.1/kernel/sched_mon.h 2006-10-15 04:18:52 +0200 @@ -0,0 +1,200 @@ + +#include + +#ifdef CONFIG_VSERVER_MONITOR + +#ifdef CONFIG_VSERVER_HARDCPU +#define HARDCPU(x) (x) +#else +#define HARDCPU(x) (0) +#endif + +#ifdef CONFIG_VSERVER_IDLETIME +#define IDLETIME(x) (x) +#else +#define IDLETIME(x) (0) +#endif + +struct _vx_mon_entry *vxm_advance(int cpu); + + +static inline +void __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type) +{ + entry->type = type; + entry->xid = xid; +} + +static inline +void __vxm_sync(int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, 0, VXM_SYNC); + entry->ev.sec = xtime.tv_sec; + entry->ev.nsec = xtime.tv_nsec; +} + +static inline +void __vxm_task(struct task_struct *p, int type) +{ + struct _vx_mon_entry *entry = vxm_advance(task_cpu(p)); + + __vxm_basic(entry, p->xid, type); + entry->ev.tsk.pid = p->pid; + entry->ev.tsk.state = p->state; +} + +static inline +void __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags)); + entry->sd.tokens = s->tokens; + entry->sd.norm_time = s->norm_time; + entry->sd.idle_time = s->idle_time; +} + +static inline +void __vxm_rqinfo1(struct rq *q, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + entry->type = VXM_RQINFO_1; + entry->xid = ((unsigned long)q >> 16) & 0xffff; + entry->q1.running = q->nr_running; + entry->q1.onhold = HARDCPU(q->nr_onhold); + entry->q1.iowait = atomic_read(&q->nr_iowait); + entry->q1.uintr = q->nr_uninterruptible; + entry->q1.idle_tokens = IDLETIME(q->idle_tokens); +} + +static inline +void __vxm_rqinfo2(struct rq *q, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + entry->type = VXM_RQINFO_2; + entry->xid = (unsigned long)q & 0xffff; + entry->q2.norm_time = q->norm_time; + entry->q2.idle_time = q->idle_time; + entry->q2.idle_skip = IDLETIME(q->idle_skip); +} + +static inline +void __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE); + entry->ev.tokens = s->tokens; +} + +static inline +void __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1); + entry->u1.tokens_max = s->tokens_max; + entry->u1.fill_rate = s->fill_rate[0]; + entry->u1.interval = s->interval[0]; +} + +static inline +void __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + struct _vx_mon_entry *entry = vxm_advance(cpu); + + __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2); + entry->u2.tokens_min = s->tokens_min; + entry->u2.fill_rate = s->fill_rate[1]; + entry->u2.interval = s->interval[1]; +} + + +#define vxm_activate_task(p,q) __vxm_task(p, VXM_ACTIVATE) +#define vxm_activate_idle(p,q) __vxm_task(p, VXM_IDLE) +#define vxm_deactivate_task(p,q) __vxm_task(p, VXM_DEACTIVATE) +#define vxm_hold_task(p,q) __vxm_task(p, VXM_HOLD) +#define vxm_unhold_task(p,q) __vxm_task(p, VXM_UNHOLD) + +static inline +void vxm_migrate_task(struct task_struct *p, struct rq *rq, int dest) +{ + __vxm_task(p, VXM_MIGRATE); + __vxm_rqinfo1(rq, task_cpu(p)); + __vxm_rqinfo2(rq, task_cpu(p)); +} + +static inline +void vxm_idle_skip(struct rq *rq, int cpu) +{ + __vxm_rqinfo1(rq, cpu); + __vxm_rqinfo2(rq, cpu); +} + +static inline +void vxm_need_resched(struct task_struct *p, int slice, int cpu) +{ + if (slice) + return; + + __vxm_task(p, VXM_RESCHED); +} + +static inline +void vxm_sync(unsigned long now, int cpu) +{ + if (!CONFIG_VSERVER_MONITOR_SYNC || + (now % CONFIG_VSERVER_MONITOR_SYNC)) + return; + + __vxm_sync(cpu); +} + +#define vxm_sched_info(s,v,c) __vxm_sched(s,v,c) + +static inline +void vxm_tokens_recalc(struct _vx_sched_pc *s, struct rq *rq, + struct vx_info *vxi, int cpu) +{ + __vxm_sched(s, vxi, cpu); + __vxm_rqinfo2(rq, cpu); +} + +static inline +void vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu) +{ + __vxm_sched(s, vxi, cpu); + __vxm_update(s, vxi, cpu); + __vxm_update1(s, vxi, cpu); + __vxm_update2(s, vxi, cpu); +} + +static inline +void vxm_rq_max_min(struct rq *rq, int cpu) +{ + __vxm_rqinfo1(rq, cpu); + __vxm_rqinfo2(rq, cpu); +} + +#else /* CONFIG_VSERVER_MONITOR */ + +#define vxm_activate_task(t,q) do { } while (0) +#define vxm_activate_idle(t,q) do { } while (0) +#define vxm_deactivate_task(t,q) do { } while (0) +#define vxm_hold_task(t,q) do { } while (0) +#define vxm_unhold_task(t,q) do { } while (0) +#define vxm_migrate_task(t,q,d) do { } while (0) +#define vxm_idle_skip(q,c) do { } while (0) +#define vxm_need_resched(t,s,c) do { } while (0) +#define vxm_sync(s,c) do { } while (0) +#define vxm_sched_info(s,v,c) do { } while (0) +#define vxm_tokens_recalc(s,q,v,c) do { } while (0) +#define vxm_update_sched(s,v,c) do { } while (0) +#define vxm_rq_max_min(q,c) do { } while (0) + +#endif /* CONFIG_VSERVER_MONITOR */ + --- linux-2.6.18.2/kernel/vserver/monitor.c 1970-01-01 01:00:00 +0100 +++ linux-2.6.18.2-vs2.1.1/kernel/vserver/monitor.c 2006-10-28 19:33:51 +0200 @@ -0,0 +1,142 @@ +/* + * kernel/vserver/monitor.c + * + * Virtual Context Scheduler Monitor + * + * Copyright (C) 2006 Herbert Pötzl + * + * V0.01 basic design + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + + +#ifdef CONFIG_VSERVER_MONITOR +#define VXM_SIZE CONFIG_VSERVER_MONITOR_SIZE +#else +#define VXM_SIZE 64 +#endif + +struct _vx_monitor { + unsigned int counter; + + struct _vx_mon_entry entry[VXM_SIZE+1]; +}; + + +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer); + +unsigned volatile int vxm_active = 1; + +static atomic_t sequence = ATOMIC_INIT(0); + + +/* vxm_advance() + + * requires disabled preemption */ + +struct _vx_mon_entry *vxm_advance(int cpu) +{ + struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu); + struct _vx_mon_entry *entry; + unsigned int index; + + index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE; + entry = &mon->entry[index]; + + entry->ev.seq = atomic_inc_return(&sequence); + entry->ev.jif = jiffies; + return entry; +} + +EXPORT_SYMBOL_GPL(vxm_advance); + + +int do_read_monitor(struct __user _vx_mon_entry *data, + int cpu, uint32_t *index, uint32_t *count) +{ + int pos, ret = 0; + struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu); + int end = mon->counter; + int start = end - VXM_SIZE + 2; + int idx = *index; + + /* special case: get current pos */ + if (!*count) { + *index = end; + return 0; + } + + /* have we lost some data? */ + if (idx < start) + idx = start; + + for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) { + struct _vx_mon_entry *entry = + &mon->entry[idx % VXM_SIZE]; + + /* send entry to userspace */ + ret = copy_to_user (&data[pos], entry, sizeof(*entry)); + if (ret) + break; + } + /* save new index and count */ + *index = idx; + *count = pos; + return ret ? ret : (*index < end); +} + +int vc_read_monitor(uint32_t id, void __user *data) +{ + struct vcmd_read_monitor_v0 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_monitor((struct __user _vx_mon_entry *)vc_data.data, + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT + +int vc_read_monitor_x32(uint32_t id, void __user *data) +{ + struct vcmd_read_monitor_v0_x32 vc_data; + int ret; + + if (id >= NR_CPUS) + return -EINVAL; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = do_read_monitor((struct __user _vx_mon_entry *) + compat_ptr(vc_data.data_ptr), + id, &vc_data.index, &vc_data.count); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return ret; +} + +#endif /* CONFIG_COMPAT */ +