diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/include/linux/vserver/monitor.h linux-2.6.16-vs2.1.1-rc14.11/include/linux/vserver/monitor.h
--- linux-2.6.16-vs2.1.1-rc14.10/include/linux/vserver/monitor.h	1970-01-01 01:00:00 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/include/linux/vserver/monitor.h	2006-04-03 02:10:30 +0200
@@ -0,0 +1,97 @@
+#ifndef _VX_MONITOR_H
+#define _VX_MONITOR_H
+
+#include <linux/config.h>
+
+
+enum {
+	VXM_UNUSED = 0,
+
+	VXM_SYNC = 0x10,
+
+	VXM_UPDATE = 0x20,
+	VXM_UPDATE_1,
+	VXM_UPDATE_2,
+
+	VXM_RQINFO_1 = 0x24,
+	VXM_RQINFO_2,
+
+	VXM_ACTIVATE = 0x40,
+	VXM_DEACTIVATE,
+	VXM_IDLE,
+
+	VXM_HOLD = 0x44,
+	VXM_UNHOLD,
+
+	VXM_MIGRATE = 0x48,
+	VXM_RESCHED,
+
+	/* all other bits are flags */
+	VXM_SCHED = 0x80,
+};
+
+struct _vxm_update_1 {
+	uint32_t tokens_max;
+	uint32_t fill_rate;
+	uint32_t interval;
+};
+
+struct _vxm_update_2 {
+	uint32_t tokens_min;
+	uint32_t fill_rate;
+	uint32_t interval;
+};
+
+struct _vxm_rqinfo_1 {
+	uint16_t running;
+	uint16_t onhold;
+	uint16_t iowait;
+	uint16_t uintr;
+	uint32_t idle_tokens;
+};
+
+struct _vxm_rqinfo_2 {
+	uint32_t norm_time;
+	uint32_t idle_time;
+	uint32_t idle_skip;
+};
+
+struct _vxm_sched {
+	uint32_t tokens;
+	uint32_t norm_time;
+	uint32_t idle_time;
+};
+
+struct _vxm_task {
+	uint16_t pid;
+	uint16_t state;
+};
+
+struct _vxm_event {
+	uint32_t jif;
+	union {
+		uint32_t seq;
+		uint32_t sec;
+	};
+	union {
+		uint32_t tokens;
+		uint32_t nsec;
+		struct _vxm_task tsk;
+	};
+};
+
+struct _vx_mon_entry {
+	uint16_t type;
+	uint16_t xid;
+	union {
+		struct _vxm_event ev;
+		struct _vxm_sched sd;
+		struct _vxm_update_1 u1;
+		struct _vxm_update_2 u2;
+		struct _vxm_rqinfo_1 q1;
+		struct _vxm_rqinfo_2 q2;
+	};
+};
+
+
+#endif /* _VX_MONITOR_H */
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/sched.c linux-2.6.16-vs2.1.1-rc14.11/kernel/sched.c
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/sched.c	2006-03-20 18:09:05 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/sched.c	2006-04-03 02:10:30 +0200
@@ -683,11 +683,15 @@ static int effective_prio(task_t *p)
 	return prio;
 }
 
+#include "sched_mon.h"
+
+
 /*
  * __activate_task - move a task to the runqueue.
  */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
+	vxm_activate_task(p, rq);
 	enqueue_task(p, rq->active);
 	rq->nr_running++;
 }
@@ -697,6 +701,7 @@ static inline void __activate_task(task_
  */
 static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
+	vxm_activate_idle(p, rq);
 	enqueue_task_head(p, rq->active);
 	rq->nr_running++;
 }
@@ -825,6 +830,7 @@ static void __deactivate_task(struct tas
 {
 	rq->nr_running--;
 	dequeue_task(p, p->array);
+	vxm_deactivate_task(p, rq);
 	p->array = NULL;
 }
 
@@ -901,6 +907,7 @@ static int migrate_task(task_t *p, int d
 {
 	runqueue_t *rq = task_rq(p);
 
+	vxm_migrate_task(p, rq, dest_cpu);
 	/*
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
@@ -2616,6 +2623,7 @@ void scheduler_tick(void)
 	unsigned long long now = sched_clock();
 
 	update_cpu_clock(p, rq, now);
+	vxm_sync(now, cpu);
 
 	rq->timestamp_last_tick = now;
 
@@ -2996,7 +3004,7 @@ pick_next:
 	if (unlikely(!rq->nr_running)) {
 go_idle:
 		/* can we skip idle time? */
-		if (vx_try_skip(rq))
+		if (vx_try_skip(rq, cpu))
 			goto try_unhold;
 
 		idle_balance(cpu, rq);
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/sched_hard.h linux-2.6.16-vs2.1.1-rc14.11/kernel/sched_hard.h
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/sched_hard.h	2006-03-23 20:36:40 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/sched_hard.h	2006-04-03 02:10:30 +0200
@@ -44,11 +44,12 @@ void __vx_save_min_skip(int ret, int *mi
 }
 
 static inline
-int vx_try_skip(runqueue_t *rq)
+int vx_try_skip(runqueue_t *rq, int cpu)
 {
 	/* artificially advance time */
 	if (rq->idle_skip && !list_empty(&rq->hold_queue)) {
 		rq->idle_time += rq->idle_skip;
+		vxm_idle_skip(rq, cpu);
 		return 1;
 	}
 	return 0;
@@ -62,7 +63,7 @@ int vx_try_skip(runqueue_t *rq)
 #define vx_save_min_skip(ret, min, val)
 
 static inline
-int vx_try_skip(runqueue_t *rq)
+int vx_try_skip(runqueue_t *rq, int cpu)
 {
 	return 0;
 }
@@ -97,6 +98,7 @@ void vx_hold_task(struct task_struct *p,
 	p->state |= TASK_ONHOLD;
 	/* a new one on hold */
 	rq->nr_onhold++;
+	vxm_hold_task(p, rq);
 	list_add_tail(&p->run_list, &rq->hold_queue);
 }
 
@@ -112,6 +114,7 @@ void vx_unhold_task(struct task_struct *
 	p->state &= ~TASK_ONHOLD;
 	enqueue_task(p, rq->expired);
 	rq->nr_running++;
+	vxm_unhold_task(p, rq);
 
 	if (p->static_prio < rq->best_expired_prio)
 		rq->best_expired_prio = p->static_prio;
@@ -159,8 +162,9 @@ int vx_need_resched(struct task_struct *
 
 		/* for tokens > 0, one token was consumed */
 		if (tokens < 2)
-			return 1;
+			slice = 0;
 	}
+	vxm_need_resched(p, slice, cpu);
 	return (slice == 0);
 }
 
@@ -199,8 +203,10 @@ void vx_try_unhold(runqueue_t *rq, int c
 		sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
 
 		/* recalc tokens */
+		vxm_sched_info(sched_pc, vxi, cpu);
 		ret = vx_tokens_recalc(sched_pc,
 			&rq->norm_time, &rq->idle_time, delta_min);
+		vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
 
 		if (ret > 0) {
 			/* we found a runable context */
@@ -212,6 +218,7 @@ void vx_try_unhold(runqueue_t *rq, int c
 	}
 	vx_set_rq_max_idle(rq, maxidle);
 	vx_set_rq_min_skip(rq, minskip);
+	vxm_rq_max_min(rq, cpu);
 }
 
 
@@ -238,11 +245,14 @@ int vx_schedule(struct task_struct *next
 	/* update scheduler params */
 	if (cpu_isset(cpu, vxi->sched.update)) {
 		vx_update_sched_param(&vxi->sched, sched_pc);
+		vxm_update_sched(sched_pc, vxi, cpu);
 		cpu_clear(cpu, vxi->sched.update);
 	}
 #endif
+	vxm_sched_info(sched_pc, vxi, cpu);
 	ret  = vx_tokens_recalc(sched_pc,
 		&rq->norm_time, &rq->idle_time, delta_min);
+	vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
 
 	if (!vx_check_flags(flags , VXF_SCHED_HARD, 0))
 		return 1;
@@ -250,6 +260,7 @@ int vx_schedule(struct task_struct *next
 	if (unlikely(ret < 0)) {
 		vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]);
 		vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]);
+		vxm_rq_max_min(rq, cpu);
 	put_on_hold:
 		vx_hold_task(next, rq);
 		return 0;
@@ -305,8 +316,10 @@ int vx_schedule(struct task_struct *next
 		return 1;
 
 	sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
+	vxm_sched_info(sched_pc, vxi, cpu);
 	ret  = vx_tokens_recalc(sched_pc,
 		&rq->norm_time, &rq->idle_time, delta_min);
+	vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
 	return 1;
 }
 
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/sched_mon.h linux-2.6.16-vs2.1.1-rc14.11/kernel/sched_mon.h
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/sched_mon.h	1970-01-01 01:00:00 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/sched_mon.h	2006-04-03 02:12:32 +0200
@@ -0,0 +1,189 @@
+
+#include <linux/vserver/monitor.h>
+
+#ifdef  CONFIG_VSERVER_MONITOR
+
+struct _vx_mon_entry *vxm_advance(int cpu);
+
+
+static inline
+void	__vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type)
+{
+	entry->type = type;
+	entry->xid = xid;
+}
+
+static inline
+void	__vxm_sync(int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+	struct timespec now = current_kernel_time();
+
+	__vxm_basic(entry, 0, VXM_SYNC);
+	entry->ev.sec = now.tv_sec;
+	entry->ev.nsec = now.tv_nsec;
+}
+
+static inline
+void	__vxm_task(struct task_struct *p, int type)
+{
+	struct _vx_mon_entry *entry = vxm_advance(task_cpu(p));
+
+	__vxm_basic(entry, p->xid, type);
+	entry->ev.tsk.pid = p->pid;
+	entry->ev.tsk.state = p->state;
+}
+
+static inline
+void	__vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	__vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags));
+	entry->sd.tokens = s->tokens;
+	entry->sd.norm_time = s->norm_time;
+	entry->sd.idle_time = s->idle_time;
+}
+
+static inline
+void	__vxm_rqinfo1(runqueue_t *q, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	entry->type = VXM_RQINFO_1;
+	entry->xid = ((unsigned)(long)q >> 16) & 0xffff;
+	entry->q1.running = q->nr_running;
+	entry->q1.onhold = q->nr_onhold;
+	entry->q1.iowait = atomic_read(&q->nr_iowait);
+	entry->q1.uintr = q->nr_uninterruptible;
+	entry->q1.idle_tokens = q->idle_tokens;
+}
+
+static inline
+void	__vxm_rqinfo2(runqueue_t *q, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	entry->type = VXM_RQINFO_2;
+	entry->xid = (unsigned)(long)q & 0xffff;
+	entry->q2.norm_time = q->norm_time;
+	entry->q2.idle_time = q->idle_time;
+	entry->q2.idle_skip = q->idle_skip;
+}
+
+static inline
+void	__vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	__vxm_basic(entry, vxi->vx_id, VXM_UPDATE);
+	entry->ev.tokens = s->tokens;
+}
+
+static inline
+void	__vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	__vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1);
+	entry->u1.tokens_max = s->tokens_max;
+	entry->u1.fill_rate = s->fill_rate[0];
+	entry->u1.interval = s->interval[0];
+}
+
+static inline
+void	__vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
+{
+	struct _vx_mon_entry *entry = vxm_advance(cpu);
+
+	__vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2);
+	entry->u2.tokens_min = s->tokens_min;
+	entry->u2.fill_rate = s->fill_rate[1];
+	entry->u2.interval = s->interval[1];
+}
+
+
+#define	vxm_activate_task(p,q)		__vxm_task(p, VXM_ACTIVATE)
+#define	vxm_activate_idle(p,q)		__vxm_task(p, VXM_IDLE)
+#define	vxm_deactivate_task(p,q)	__vxm_task(p, VXM_DEACTIVATE)
+#define	vxm_hold_task(p,q)		__vxm_task(p, VXM_HOLD)
+#define	vxm_unhold_task(p,q)		__vxm_task(p, VXM_UNHOLD)
+
+static inline
+void	vxm_migrate_task(struct task_struct *p, runqueue_t *rq, int dest)
+{
+	__vxm_task(p, VXM_MIGRATE);
+	__vxm_rqinfo1(rq, task_cpu(p));
+	__vxm_rqinfo2(rq, task_cpu(p));
+}
+
+static inline
+void	vxm_idle_skip(runqueue_t *rq, int cpu)
+{
+	__vxm_rqinfo1(rq, cpu);
+	__vxm_rqinfo2(rq, cpu);
+}
+
+static inline
+void	vxm_need_resched(struct task_struct *p, int slice, int cpu)
+{
+	if (slice)
+		return;
+
+	__vxm_task(p, VXM_RESCHED);
+}
+
+static inline
+void	vxm_sync(unsigned long now, int cpu)
+{
+	if (!CONFIG_VSERVER_MONITOR_SYNC ||
+		(now % CONFIG_VSERVER_MONITOR_SYNC))
+		return;
+
+	__vxm_sync(cpu);
+}
+
+#define	vxm_sched_info(s,v,c)		__vxm_sched(s,v,c)
+
+static inline
+void	vxm_tokens_recalc(struct _vx_sched_pc *s, runqueue_t *rq,
+	struct vx_info *vxi, int cpu)
+{
+	__vxm_sched(s, vxi, cpu);
+	__vxm_rqinfo2(rq, cpu);
+}
+
+static inline
+void	vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
+{
+	__vxm_sched(s, vxi, cpu);
+	__vxm_update(s, vxi, cpu);
+	__vxm_update1(s, vxi, cpu);
+	__vxm_update2(s, vxi, cpu);
+}
+
+static inline
+void	vxm_rq_max_min(runqueue_t *rq, int cpu)
+{
+	__vxm_rqinfo1(rq, cpu);
+	__vxm_rqinfo2(rq, cpu);
+}
+
+#else  /* CONFIG_VSERVER_MONITOR */
+
+#define	vxm_activate_task(t,q)		do { } while (0)
+#define	vxm_activate_idle(t,q)		do { } while (0)
+#define	vxm_deactivate_task(t,q)	do { } while (0)
+#define	vxm_hold_task(t,q)		do { } while (0)
+#define	vxm_unhold_task(t,q)		do { } while (0)
+#define	vxm_migrate_task(t,q,d)		do { } while (0)
+#define	vxm_idle_skip(q,c)		do { } while (0)
+#define	vxm_need_resched(t,s,c)		do { } while (0)
+#define	vxm_sync(s,v,c)			do { } while (0)
+#define	vxm_sched_info(s,v,c)		do { } while (0)
+#define	vxm_tokens_recalc(s,q,v,c)	do { } while (0)
+#define	vxm_update_sched(s,v,c)		do { } while (0)
+#define	vxm_rq_max_min(q,c)		do { } while (0)
+
+#endif /* CONFIG_VSERVER_MONITOR */
+
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/Kconfig linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/Kconfig
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/Kconfig	2006-03-23 23:10:54 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/Kconfig	2006-04-03 02:10:30 +0200
@@ -199,6 +199,33 @@ config	VSERVER_HISTORY_SIZE
 	  This allows you to specify the number of entries in
 	  the per-CPU history buffer.
 
+config	VSERVER_MONITOR
+	bool	"VServer Scheduling Monitor"
+	depends on VSERVER_DEBUG
+	default n
+	help
+	  Set this to yes if you want to record the scheduling
+	  decisions, so that they can be relayed to userspace
+	  for detailed analysis.
+
+config	VSERVER_MONITOR_SIZE
+	int "Per-CPU Monitor Queue Size (32-65536)"
+	depends on VSERVER_MONITOR
+	range 32 65536
+	default 1024
+	help
+	  This allows you to specify the number of entries in
+	  the per-CPU scheduling monitor buffer.
+
+config	VSERVER_MONITOR_SYNC
+	int "Per-CPU Monitor Sync Interval (0-65536)"
+	depends on VSERVER_MONITOR
+	range 0 65536
+	default 256
+	help
+	  This allows you to specify the interval in ticks
+	  when a time sync entry is inserted.
+
 endmenu
 
 
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/Makefile linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/Makefile
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/Makefile	2006-03-20 18:09:05 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/Makefile	2006-04-03 02:10:30 +0200
@@ -13,4 +13,5 @@ vserver-$(CONFIG_VSERVER_DEBUG) += sysct
 vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
 vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o
 vserver-$(CONFIG_VSERVER_HISTORY) += history.o
+vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
 
diff -NurpP --minimal linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/monitor.c linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/monitor.c
--- linux-2.6.16-vs2.1.1-rc14.10/kernel/vserver/monitor.c	1970-01-01 01:00:00 +0100
+++ linux-2.6.16-vs2.1.1-rc14.11/kernel/vserver/monitor.c	2006-04-03 02:10:30 +0200
@@ -0,0 +1,64 @@
+/*
+ *  kernel/vserver/monitor.c
+ *
+ *  Virtual Context Scheduler Monitor
+ *
+ *  Copyright (C) 2006 Herbert Pötzl
+ *
+ *  V0.01  basic design
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/unistd.h>
+
+#include <linux/vserver/monitor.h>
+
+
+#ifdef	CONFIG_VSERVER_MONITOR
+#define VXM_SIZE	CONFIG_VSERVER_MONITOR_SIZE
+#else
+#define VXM_SIZE	64
+#endif
+
+struct _vx_monitor {
+	unsigned int counter;
+
+	struct _vx_mon_entry entry[VXM_SIZE+1];
+};
+
+
+DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer);
+
+unsigned volatile int vxm_active = 1;
+
+static atomic_t sequence = ATOMIC_INIT(0);
+
+
+/*	vxm_advance()
+
+	* requires disabled preemption				*/
+
+struct _vx_mon_entry *vxm_advance(int cpu)
+{
+	struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
+	struct _vx_mon_entry *entry;
+	unsigned int index;
+
+	index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE;
+	entry = &mon->entry[index];
+
+	entry->ev.seq = atomic_inc_return(&sequence);
+	entry->ev.jif = jiffies;
+	return entry;
+}
+
+EXPORT_SYMBOL_GPL(vxm_advance);
+