aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/taskstats.h4
-rw-r--r--include/linux/taskstats_kern.h27
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/taskstats.c200
4 files changed, 198 insertions, 38 deletions
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index c6aeca32348e..f1cb6cddd19d 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -91,8 +91,6 @@ struct taskstats {
};
-#define TASKSTATS_LISTEN_GROUP 0x1
-
/*
* Commands sent from userspace
* Not versioned. New commands should only be inserted at the enum's end
@@ -124,6 +122,8 @@ enum {
TASKSTATS_CMD_ATTR_UNSPEC = 0,
TASKSTATS_CMD_ATTR_PID,
TASKSTATS_CMD_ATTR_TGID,
+ TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+ TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
__TASKSTATS_CMD_ATTR_MAX,
};
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 2b6adec3a2e4..16894b7edcc8 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -11,30 +11,10 @@
#include <linux/sched.h>
#include <net/genetlink.h>
-enum {
- TASKSTATS_MSG_UNICAST, /* send data only to requester */
- TASKSTATS_MSG_MULTICAST, /* send data to a group */
-};
-
#ifdef CONFIG_TASKSTATS
extern kmem_cache_t *taskstats_cache;
extern struct mutex taskstats_exit_mutex;
-static inline int taskstats_has_listeners(void)
-{
- if (!genl_sock)
- return 0;
- return netlink_has_listeners(genl_sock, TASKSTATS_LISTEN_GROUP);
-}
-
-
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
-{
- *ptidstats = NULL;
- if (taskstats_has_listeners())
- *ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-}
-
static inline void taskstats_exit_free(struct taskstats *tidstats)
{
if (tidstats)
@@ -82,17 +62,18 @@ static inline void taskstats_tgid_free(struct signal_struct *sig)
kmem_cache_free(taskstats_cache, stats);
}
-extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int);
+extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
+extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int);
extern void taskstats_init_early(void);
extern void taskstats_tgid_alloc(struct signal_struct *);
#else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
+static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
{}
static inline void taskstats_exit_free(struct taskstats *ptidstats)
{}
static inline void taskstats_exit_send(struct task_struct *tsk,
struct taskstats *tidstats,
- int group_dead)
+ int group_dead, unsigned int cpu)
{}
static inline void taskstats_tgid_init(struct signal_struct *sig)
{}
diff --git a/kernel/exit.c b/kernel/exit.c
index 67c1e9a4f812..dba194a8d416 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -847,6 +847,7 @@ fastcall NORET_TYPE void do_exit(long code)
struct task_struct *tsk = current;
struct taskstats *tidstats;
int group_dead;
+ unsigned int mycpu;
profile_task_exit(tsk);
@@ -884,7 +885,7 @@ fastcall NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
- taskstats_exit_alloc(&tidstats);
+ taskstats_exit_alloc(&tidstats, &mycpu);
acct_update_integrals(tsk);
if (tsk->mm) {
@@ -905,7 +906,7 @@ fastcall NORET_TYPE void do_exit(long code)
#endif
if (unlikely(tsk->audit_context))
audit_free(tsk);
- taskstats_exit_send(tsk, tidstats, group_dead);
+ taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
taskstats_exit_free(tidstats);
delayacct_tsk_exit(tsk);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4a0a5022b299..abb59e323544 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -19,9 +19,17 @@
#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
#include <net/genetlink.h>
#include <asm/atomic.h>
+/*
+ * Maximum length of a cpumask that can be specified in
+ * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
+ */
+#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
+
static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
static int family_registered;
kmem_cache_t *taskstats_cache;
@@ -37,8 +45,25 @@ static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1]
__read_mostly = {
[TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
+ [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
+ [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
+
+struct listener {
+ struct list_head list;
+ pid_t pid;
};
+struct listener_list {
+ struct rw_semaphore sem;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct listener_list, listener_array);
+
+enum actions {
+ REGISTER,
+ DEREGISTER,
+ CPU_DONT_CARE
+};
static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
void **replyp, size_t size)
@@ -74,25 +99,68 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
return 0;
}
-static int send_reply(struct sk_buff *skb, pid_t pid, int event)
+/*
+ * Send taskstats data in @skb to listener with nl_pid @pid
+ */
+static int send_reply(struct sk_buff *skb, pid_t pid)
{
struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
- void *reply;
+ void *reply = genlmsg_data(genlhdr);
int rc;
- reply = genlmsg_data(genlhdr);
-
rc = genlmsg_end(skb, reply);
if (rc < 0) {
nlmsg_free(skb);
return rc;
}
- if (event == TASKSTATS_MSG_MULTICAST)
- return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP);
return genlmsg_unicast(skb, pid);
}
+/*
+ * Send taskstats data in @skb to listeners registered for @cpu's exit data
+ */
+static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
+{
+ struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+ struct listener_list *listeners;
+ struct listener *s, *tmp;
+ struct sk_buff *skb_next, *skb_cur = skb;
+ void *reply = genlmsg_data(genlhdr);
+ int rc, ret;
+
+ rc = genlmsg_end(skb, reply);
+ if (rc < 0) {
+ nlmsg_free(skb);
+ return rc;
+ }
+
+ rc = 0;
+ listeners = &per_cpu(listener_array, cpu);
+ down_write(&listeners->sem);
+ list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+ skb_next = NULL;
+ if (!list_is_last(&s->list, &listeners->list)) {
+ skb_next = skb_clone(skb_cur, GFP_KERNEL);
+ if (!skb_next) {
+ nlmsg_free(skb_cur);
+ rc = -ENOMEM;
+ break;
+ }
+ }
+ ret = genlmsg_unicast(skb_cur, s->pid);
+ if (ret == -ECONNREFUSED) {
+ list_del(&s->list);
+ kfree(s);
+ rc = ret;
+ }
+ skb_cur = skb_next;
+ }
+ up_write(&listeners->sem);
+
+ return rc;
+}
+
static int fill_pid(pid_t pid, struct task_struct *pidtsk,
struct taskstats *stats)
{
@@ -204,8 +272,73 @@ ret:
return;
}
+static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
+{
+ struct listener_list *listeners;
+ struct listener *s, *tmp;
+ unsigned int cpu;
+ cpumask_t mask = *maskp;
-static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
+ if (!cpus_subset(mask, cpu_possible_map))
+ return -EINVAL;
+
+ if (isadd == REGISTER) {
+ for_each_cpu_mask(cpu, mask) {
+ s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!s)
+ goto cleanup;
+ s->pid = pid;
+ INIT_LIST_HEAD(&s->list);
+
+ listeners = &per_cpu(listener_array, cpu);
+ down_write(&listeners->sem);
+ list_add(&s->list, &listeners->list);
+ up_write(&listeners->sem);
+ }
+ return 0;
+ }
+
+ /* Deregister or cleanup */
+cleanup:
+ for_each_cpu_mask(cpu, mask) {
+ listeners = &per_cpu(listener_array, cpu);
+ down_write(&listeners->sem);
+ list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+ if (s->pid == pid) {
+ list_del(&s->list);
+ kfree(s);
+ break;
+ }
+ }
+ up_write(&listeners->sem);
+ }
+ return 0;
+}
+
+static int parse(struct nlattr *na, cpumask_t *mask)
+{
+ char *data;
+ int len;
+ int ret;
+
+ if (na == NULL)
+ return 1;
+ len = nla_len(na);
+ if (len > TASKSTATS_CPUMASK_MAXLEN)
+ return -E2BIG;
+ if (len < 1)
+ return -EINVAL;
+ data = kmalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ nla_strlcpy(data, na, len);
+ ret = cpulist_parse(data, *mask);
+ kfree(data);
+ return ret;
+}
+
+static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
struct sk_buff *rep_skb;
@@ -213,6 +346,19 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
void *reply;
size_t size;
struct nlattr *na;
+ cpumask_t mask;
+
+ rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+ if (rc < 0)
+ return rc;
+ if (rc == 0)
+ return add_del_listener(info->snd_pid, &mask, REGISTER);
+
+ rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+ if (rc < 0)
+ return rc;
+ if (rc == 0)
+ return add_del_listener(info->snd_pid, &mask, DEREGISTER);
/*
* Size includes space for nested attributes
@@ -252,7 +398,7 @@ static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
nla_nest_end(rep_skb, na);
- return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
+ return send_reply(rep_skb, info->snd_pid);
nla_put_failure:
return genlmsg_cancel(rep_skb, reply);
@@ -261,9 +407,35 @@ err:
return rc;
}
+void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
+{
+ struct listener_list *listeners;
+ struct taskstats *tmp;
+ /*
+ * This is the cpu on which the task is exiting currently and will
+ * be the one for which the exit event is sent, even if the cpu
+ * on which this function is running changes later.
+ */
+ *mycpu = raw_smp_processor_id();
+
+ *ptidstats = NULL;
+ tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
+ if (!tmp)
+ return;
+
+ listeners = &per_cpu(listener_array, *mycpu);
+ down_read(&listeners->sem);
+ if (!list_empty(&listeners->list)) {
+ *ptidstats = tmp;
+ tmp = NULL;
+ }
+ up_read(&listeners->sem);
+ kfree(tmp);
+}
+
/* Send pid data out on exit */
void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
- int group_dead)
+ int group_dead, unsigned int mycpu)
{
int rc;
struct sk_buff *rep_skb;
@@ -324,7 +496,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
nla_nest_end(rep_skb, na);
send:
- send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
+ send_cpu_listeners(rep_skb, mycpu);
return;
nla_put_failure:
@@ -338,16 +510,22 @@ ret:
static struct genl_ops taskstats_ops = {
.cmd = TASKSTATS_CMD_GET,
- .doit = taskstats_send_stats,
+ .doit = taskstats_user_cmd,
.policy = taskstats_cmd_get_policy,
};
/* Needed early in initialization */
void __init taskstats_init_early(void)
{
+ unsigned int i;
+
taskstats_cache = kmem_cache_create("taskstats_cache",
sizeof(struct taskstats),
0, SLAB_PANIC, NULL, NULL);
+ for_each_possible_cpu(i) {
+ INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
+ init_rwsem(&(per_cpu(listener_array, i).sem));
+ }
}
static int __init taskstats_init(void)