diff options
-rw-r--r-- | include/linux/taskstats.h | 4 | ||||
-rw-r--r-- | include/linux/taskstats_kern.h | 27 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/taskstats.c | 200 |
4 files changed, 198 insertions, 38 deletions
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index c6aeca32348e..f1cb6cddd19d 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -91,8 +91,6 @@ struct taskstats { }; -#define TASKSTATS_LISTEN_GROUP 0x1 - /* * Commands sent from userspace * Not versioned. New commands should only be inserted at the enum's end @@ -124,6 +122,8 @@ enum { TASKSTATS_CMD_ATTR_UNSPEC = 0, TASKSTATS_CMD_ATTR_PID, TASKSTATS_CMD_ATTR_TGID, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, __TASKSTATS_CMD_ATTR_MAX, }; diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 2b6adec3a2e4..16894b7edcc8 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -11,30 +11,10 @@ #include <linux/sched.h> #include <net/genetlink.h> -enum { - TASKSTATS_MSG_UNICAST, /* send data only to requester */ - TASKSTATS_MSG_MULTICAST, /* send data to a group */ -}; - #ifdef CONFIG_TASKSTATS extern kmem_cache_t *taskstats_cache; extern struct mutex taskstats_exit_mutex; -static inline int taskstats_has_listeners(void) -{ - if (!genl_sock) - return 0; - return netlink_has_listeners(genl_sock, TASKSTATS_LISTEN_GROUP); -} - - -static inline void taskstats_exit_alloc(struct taskstats **ptidstats) -{ - *ptidstats = NULL; - if (taskstats_has_listeners()) - *ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); -} - static inline void taskstats_exit_free(struct taskstats *tidstats) { if (tidstats) @@ -82,17 +62,18 @@ static inline void taskstats_tgid_free(struct signal_struct *sig) kmem_cache_free(taskstats_cache, stats); } -extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int); +extern void taskstats_exit_alloc(struct taskstats **, unsigned int *); +extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int); extern void taskstats_init_early(void); extern void taskstats_tgid_alloc(struct signal_struct *); #else -static inline void taskstats_exit_alloc(struct taskstats **ptidstats) +static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) {} static inline void taskstats_exit_free(struct taskstats *ptidstats) {} static inline void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, - int group_dead) + int group_dead, unsigned int cpu) {} static inline void taskstats_tgid_init(struct signal_struct *sig) {} diff --git a/kernel/exit.c b/kernel/exit.c index 67c1e9a4f812..dba194a8d416 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -847,6 +847,7 @@ fastcall NORET_TYPE void do_exit(long code) struct task_struct *tsk = current; struct taskstats *tidstats; int group_dead; + unsigned int mycpu; profile_task_exit(tsk); @@ -884,7 +885,7 @@ fastcall NORET_TYPE void do_exit(long code) current->comm, current->pid, preempt_count()); - taskstats_exit_alloc(&tidstats); + taskstats_exit_alloc(&tidstats, &mycpu); acct_update_integrals(tsk); if (tsk->mm) { @@ -905,7 +906,7 @@ fastcall NORET_TYPE void do_exit(long code) #endif if (unlikely(tsk->audit_context)) audit_free(tsk); - taskstats_exit_send(tsk, tidstats, group_dead); + taskstats_exit_send(tsk, tidstats, group_dead, mycpu); taskstats_exit_free(tidstats); delayacct_tsk_exit(tsk); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4a0a5022b299..abb59e323544 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -19,9 +19,17 @@ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/delayacct.h> +#include <linux/cpumask.h> +#include <linux/percpu.h> #include <net/genetlink.h> #include <asm/atomic.h> +/* + * Maximum length of a cpumask that can be specified in + * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute + */ +#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) + static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; static int family_registered; kmem_cache_t *taskstats_cache; @@ -37,8 +45,25 @@ static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] __read_mostly = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, + [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, + [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; + +struct listener { + struct list_head list; + pid_t pid; }; +struct listener_list { + struct rw_semaphore sem; + struct list_head list; +}; +static DEFINE_PER_CPU(struct listener_list, listener_array); + +enum actions { + REGISTER, + DEREGISTER, + CPU_DONT_CARE +}; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, void **replyp, size_t size) @@ -74,25 +99,68 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, return 0; } -static int send_reply(struct sk_buff *skb, pid_t pid, int event) +/* + * Send taskstats data in @skb to listener with nl_pid @pid + */ +static int send_reply(struct sk_buff *skb, pid_t pid) { struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); - void *reply; + void *reply = genlmsg_data(genlhdr); int rc; - reply = genlmsg_data(genlhdr); - rc = genlmsg_end(skb, reply); if (rc < 0) { nlmsg_free(skb); return rc; } - if (event == TASKSTATS_MSG_MULTICAST) - return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP); return genlmsg_unicast(skb, pid); } +/* + * Send taskstats data in @skb to listeners registered for @cpu's exit data + */ +static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) +{ + struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); + struct listener_list *listeners; + struct listener *s, *tmp; + struct sk_buff *skb_next, *skb_cur = skb; + void *reply = genlmsg_data(genlhdr); + int rc, ret; + + rc = genlmsg_end(skb, reply); + if (rc < 0) { + nlmsg_free(skb); + return rc; + } + + rc = 0; + listeners = &per_cpu(listener_array, cpu); + down_write(&listeners->sem); + list_for_each_entry_safe(s, tmp, &listeners->list, list) { + skb_next = NULL; + if (!list_is_last(&s->list, &listeners->list)) { + skb_next = skb_clone(skb_cur, GFP_KERNEL); + if (!skb_next) { + nlmsg_free(skb_cur); + rc = -ENOMEM; + break; + } + } + ret = genlmsg_unicast(skb_cur, s->pid); + if (ret == -ECONNREFUSED) { + list_del(&s->list); + kfree(s); + rc = ret; + } + skb_cur = skb_next; + } + up_write(&listeners->sem); + + return rc; +} + static int fill_pid(pid_t pid, struct task_struct *pidtsk, struct taskstats *stats) { @@ -204,8 +272,73 @@ ret: return; } +static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) +{ + struct listener_list *listeners; + struct listener *s, *tmp; + unsigned int cpu; + cpumask_t mask = *maskp; -static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) + if (!cpus_subset(mask, cpu_possible_map)) + return -EINVAL; + + if (isadd == REGISTER) { + for_each_cpu_mask(cpu, mask) { + s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, + cpu_to_node(cpu)); + if (!s) + goto cleanup; + s->pid = pid; + INIT_LIST_HEAD(&s->list); + + listeners = &per_cpu(listener_array, cpu); + down_write(&listeners->sem); + list_add(&s->list, &listeners->list); + up_write(&listeners->sem); + } + return 0; + } + + /* Deregister or cleanup */ +cleanup: + for_each_cpu_mask(cpu, mask) { + listeners = &per_cpu(listener_array, cpu); + down_write(&listeners->sem); + list_for_each_entry_safe(s, tmp, &listeners->list, list) { + if (s->pid == pid) { + list_del(&s->list); + kfree(s); + break; + } + } + up_write(&listeners->sem); + } + return 0; +} + +static int parse(struct nlattr *na, cpumask_t *mask) +{ + char *data; + int len; + int ret; + + if (na == NULL) + return 1; + len = nla_len(na); + if (len > TASKSTATS_CPUMASK_MAXLEN) + return -E2BIG; + if (len < 1) + return -EINVAL; + data = kmalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + nla_strlcpy(data, na, len); + ret = cpulist_parse(data, *mask); + kfree(data); + return ret; +} + +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; @@ -213,6 +346,19 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) void *reply; size_t size; struct nlattr *na; + cpumask_t mask; + + rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); + if (rc < 0) + return rc; + if (rc == 0) + return add_del_listener(info->snd_pid, &mask, REGISTER); + + rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); + if (rc < 0) + return rc; + if (rc == 0) + return add_del_listener(info->snd_pid, &mask, DEREGISTER); /* * Size includes space for nested attributes @@ -252,7 +398,7 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) nla_nest_end(rep_skb, na); - return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); + return send_reply(rep_skb, info->snd_pid); nla_put_failure: return genlmsg_cancel(rep_skb, reply); @@ -261,9 +407,35 @@ err: return rc; } +void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) +{ + struct listener_list *listeners; + struct taskstats *tmp; + /* + * This is the cpu on which the task is exiting currently and will + * be the one for which the exit event is sent, even if the cpu + * on which this function is running changes later. + */ + *mycpu = raw_smp_processor_id(); + + *ptidstats = NULL; + tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); + if (!tmp) + return; + + listeners = &per_cpu(listener_array, *mycpu); + down_read(&listeners->sem); + if (!list_empty(&listeners->list)) { + *ptidstats = tmp; + tmp = NULL; + } + up_read(&listeners->sem); + kfree(tmp); +} + /* Send pid data out on exit */ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, - int group_dead) + int group_dead, unsigned int mycpu) { int rc; struct sk_buff *rep_skb; @@ -324,7 +496,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, nla_nest_end(rep_skb, na); send: - send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); + send_cpu_listeners(rep_skb, mycpu); return; nla_put_failure: @@ -338,16 +510,22 @@ ret: static struct genl_ops taskstats_ops = { .cmd = TASKSTATS_CMD_GET, - .doit = taskstats_send_stats, + .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, }; /* Needed early in initialization */ void __init taskstats_init_early(void) { + unsigned int i; + taskstats_cache = kmem_cache_create("taskstats_cache", sizeof(struct taskstats), 0, SLAB_PANIC, NULL, NULL); + for_each_possible_cpu(i) { + INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); + init_rwsem(&(per_cpu(listener_array, i).sem)); + } } static int __init taskstats_init(void) |