/* * linux/kernel/vcontext.c * * Virtual Context Support * * Copyright (C) 2003 Herbert Pötzl * * V0.01 context helper * V0.02 vx_ctx_kill syscall command * V0.03 replaced context_info calls * V0.04 redesign of struct (de)alloc * V0.05 added O(1) scheduler stuff * */ #include #include #include #include #include #include #include #include #include #include #include int vc_ctx_kill(uint32_t id, void *data) { int retval, count=0; struct vcmd_ctx_kill_v0 vc_data; struct siginfo info; struct task_struct *p; pid_t initpid = 0; if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; if (!vx_check(0, VX_ADMIN)) return -EPERM; info.si_signo = vc_data.sig; info.si_errno = 0; info.si_code = SI_USER; info.si_pid = current->pid; info.si_uid = current->uid; retval = -ESRCH; read_lock(&tasklist_lock); switch (vc_data.pid) { case -1: case 0: for_each_task(p) { if (!initpid && vx_task_id(p) == id && p->vx_info) initpid = p->vx_info->vx_initpid; if (vx_task_id(p) == id && p->pid > 1 && (!vc_data.pid || initpid != p->pid) && thread_group_leader(p)) { int err = send_sig_info(vc_data.sig, &info, p); ++count; if (err != -EPERM) retval = err; } } break; default: p = find_task_by_pid(vc_data.pid); if (p) { if (!thread_group_leader(p)) { struct task_struct *tg; tg = find_task_by_pid(p->tgid); if (tg) p = tg; } if ((id == -1) || (vx_task_id(p) == id)) retval = send_sig_info(vc_data.sig, &info, p); } break; } read_unlock(&tasklist_lock); return retval; } int vc_get_rlimit(uint32_t id, void *data) { return -ENOSYS; } int vc_set_rlimit(uint32_t id, void *data) { return -ENOSYS; } int vc_get_rlimit_mask(uint32_t id, void *data) { return -ENOSYS; } static struct vx_info *find_vx_info(int); /* * vc_set_sched - switched syscall to alter a context's sched. prio * * Negative values indicate to leave the value as is */ int vc_set_sched(uint32_t ctx, void *data) { struct vcmd_set_sched_v1 vc_data; struct vx_info *s; if (!vx_check(0, VX_ADMIN)) return -ENOSYS; if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; s = find_vx_info(ctx); if (!s) return -EINVAL; spin_lock(&s->sched.tokens_lock); if (vc_data.fill_rate > -1) s->sched.tokens_fr = vc_data.fill_rate; if (vc_data.period > -1) s->sched.tokens_div = vc_data.period; if (vc_data.fill_level > -1) s->sched.tokens = vc_data.fill_level; if (vc_data.bucket_size > -1) s->sched.tokens_max = vc_data.bucket_size; /* Sanity check the resultant values */ if (s->sched.tokens_fr == 0) s->sched.tokens_fr = 1; if (s->sched.tokens_div == 0) s->sched.tokens_div = HZ; /* arbitrary large number */ if (s->sched.tokens_max == 0) s->sched.tokens_max = 1; if (s->sched.tokens > s->sched.tokens_max) s->sched.tokens = s->sched.tokens_max; if (vc_data.options & TBF_SCHED_ENABLE) s->vx_flags |= VX_INFO_SCHED; if (vc_data.options & TBF_SCHED_DISABLE) s->vx_flags &= ~VX_INFO_SCHED; spin_unlock(&s->sched.tokens_lock); put_vx_info(s); return 0; } /* system functions */ LIST_HEAD(vx_infos); spinlock_t vxlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * struct vx_info allocation and deallocation */ static struct vx_info *alloc_vx_info(int id) { struct vx_info *new = NULL; vxdprintk("alloc_vx_info(%d)\n", id); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); if (!new) return 0; memset (new, 0, sizeof(struct vx_info)); new->vx_id = id; /* rest of init goes here */ /* scheduling; hard code starting values as constants */ new->sched.tokens_fr = 1; new->sched.tokens_div = 4; new->sched.tokens = HZ * 5; new->sched.tokens_max = HZ * 10; new->sched.tokens_jfy = jiffies; new->sched.tokens_lock = SPIN_LOCK_UNLOCKED; new->virt.nr_threads = 1; // new->virt.bias_cswtch = kstat.context_swtch; new->virt.bias_jiffies = jiffies; /* new->virt.bias_idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime; */ down_read(&uts_sem); new->virt.utsname = system_utsname; up_read(&uts_sem); vxdprintk("alloc_vx_info(%d) = %p\n", id, new); return new; } extern int vx_proc_destroy(struct vx_info *); void free_vx_info(struct vx_info *vxi) { vxdprintk("free_vx_info(%p)\n", vxi); vx_proc_destroy(vxi); kfree(vxi); } /* * struct vx_info search by id * assumes vxlist_lock is held */ static __inline__ struct vx_info *__find_vx_info(int id) { struct vx_info *vxi; list_for_each_entry(vxi, &vx_infos, vx_list) if (vxi->vx_id == id) return vxi; return 0; } /* * struct vx_info ref stuff */ static struct vx_info *find_vx_info(int id) { struct vx_info *vxi; spin_lock(&vxlist_lock); if ((vxi = __find_vx_info(id))) get_vx_info(vxi); spin_unlock(&vxlist_lock); return vxi; } /* * struct vx_info search by id * assumes vxlist_lock is held */ static __inline__ xid_t __vx_dynamic_id(void) { static xid_t seq = MAX_S_CONTEXT; xid_t barrier = seq; do { if (++seq > MAX_S_CONTEXT) seq = MIN_D_CONTEXT; if (!__find_vx_info(seq)) return seq; } while (barrier != seq); return 0; } extern int vx_proc_create(struct vx_info *); static struct vx_info *find_or_create_vx_info(int id) { struct vx_info *new, *vxi = NULL; vxdprintk("find_or_create_vx_info(%d)\n", id); if (!(new = alloc_vx_info(id))) return 0; spin_lock(&vxlist_lock); /* dynamic context requested */ if (id == VX_DYNAMIC_ID) { id = __vx_dynamic_id(); if (!id) { printk(KERN_ERR "no dynamic context available.\n"); goto out_unlock; } new->vx_id = id; } /* existing context requested */ else if ((vxi = __find_vx_info(id))) { vxdprintk("find_or_create_vx_info(%d) = %p (found)\n", id, vxi); get_vx_info(vxi); goto out_unlock; } /* new context requested */ vxdprintk("find_or_create_vx_info(%d) = %p (new)\n", id, vxi); atomic_set(&new->vx_refcount, 1); list_add(&new->vx_list, &vx_infos); vx_proc_create(new); vxi = new, new = NULL; out_unlock: spin_unlock(&vxlist_lock); if (new) free_vx_info(new); return vxi; } static int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) { struct user_struct *new_user, *old_user; if (!p || !vxi) BUG(); new_user = alloc_uid(vxi->vx_id, p->uid); if (!new_user) return -ENOMEM; old_user = p->user; if (new_user != old_user) { atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); p->user = new_user; } free_uid(old_user); return 0; } /* * migrate task to new context * gets vxi, puts old_vxi on change */ static int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) { struct vx_info *old_vxi; int ret = 0; if (!p || !vxi) BUG(); vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, vxi->vx_id, atomic_read(&vxi->vx_refcount)); spin_lock(&p->alloc_lock); if ((old_vxi = p->vx_info) == vxi) goto out; if (!(ret = vx_migrate_user(p, vxi))) { if (old_vxi) old_vxi->virt.nr_threads--; vxi->virt.nr_threads++; p->vx_info = get_vx_info(vxi); p->vx_id = vxi->vx_id; if (old_vxi) put_vx_info(old_vxi); } out: spin_unlock(&p->alloc_lock); return ret; } static int vx_set_initpid(struct vx_info *vxi, int pid) { int ret = 0; if (vxi->vx_initpid) ret = -EPERM; else vxi->vx_initpid = pid; return ret; } int vc_new_s_context(uint32_t ctx, void *data) { int ret = -EPERM; struct vcmd_new_s_context_v1 vc_data; struct vx_info *new_vxi; if (copy_from_user(&vc_data, data, sizeof(vc_data))) return -EFAULT; /* legacy hack, will be removed soon */ if (ctx == -2) { /* assign flags and initpid */ if (!current->vx_info) return -EINVAL; ret = 0; if (vc_data.flags & VX_INFO_INIT) ret = vx_set_initpid(current->vx_info, current->tgid); if (ret == 0) { /* We keep the same vx_id, but lower the capabilities */ current->cap_bset &= (~vc_data.remove_cap); ret = vx_current_id(); current->vx_info->vx_flags |= vc_data.flags; } return ret; } if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN) || (current->vx_info && (current->vx_info->vx_flags & VX_INFO_LOCK))) return -EPERM; if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || (ctx == 0)) return -EINVAL; if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) new_vxi = find_or_create_vx_info(ctx); else new_vxi = find_vx_info(ctx); if (!new_vxi) return -EINVAL; ret = vx_migrate_task(current, new_vxi); if (ret == 0) { current->cap_bset &= (~vc_data.remove_cap); new_vxi->vx_flags |= vc_data.flags; if (vc_data.flags & VX_INFO_INIT) vx_set_initpid(new_vxi, current->tgid); ret = new_vxi->vx_id; } put_vx_info(new_vxi); return ret; } LIST_HEAD(ip_infos); spinlock_t iplist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * struct ip_info allocation and deallocation */ static struct ip_info *alloc_ip_info(void) { struct ip_info *new = NULL; vxdprintk("alloc_ip_info()\n"); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct ip_info), GFP_KERNEL); if (!new) return 0; memset (new, 0, sizeof(struct ip_info)); /* rest of init goes here */ vxdprintk("alloc_ip_info() = %p\n", new); return new; } // extern int ip_proc_destroy(struct ip_info *); void free_ip_info(struct ip_info *ipi) { vxdprintk("free_ip_info(%p)\n", ipi); // ip_proc_destroy(ipi); kfree(ipi); } static struct ip_info *create_ip_info(void) { struct ip_info *new; vxdprintk("create_ip_info()\n"); if (!(new = alloc_ip_info())) return 0; spin_lock(&iplist_lock); /* new ip info */ atomic_set(&new->ip_refcount, 1); list_add(&new->ip_list, &ip_infos); // ip_proc_create(new); spin_unlock(&iplist_lock); return new; } /* set ipv4 root (syscall) */ int vc_set_ipv4root(uint32_t nbip, void *data) { int i, err = -EPERM; struct vcmd_set_ipv4root_v3 vc_data; struct ip_info *new_ipi, *ipi = current->ip_info; if (nbip < 0 || nbip > NB_IPV4ROOT) return -EINVAL; if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; if (!ipi || ipi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) // We are allowed to change everything err = 0; else if (ipi) { int found = 0; // We are allowed to select a subset of the currently // installed IP numbers. No new one allowed // We can't change the broadcast address though for (i=0; inbipv4; j++) { if (ipip == ipi->ipv4[j]) { found++; break; } } } if ((found == nbip) && (vc_data.broadcast == ipi->v4_bcast)) err = 0; } if (err) return err; new_ipi = create_ip_info(); if (!new_ipi) return -EINVAL; new_ipi->nbipv4 = nbip; for (i=0; iipv4[i] = vc_data.ip_mask_pair[i].ip; new_ipi->mask[i] = vc_data.ip_mask_pair[i].mask; } new_ipi->v4_bcast = vc_data.broadcast; current->ip_info = new_ipi; put_ip_info(ipi); return 0; }