diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/audit.c | 532 | ||||
-rw-r--r-- | kernel/audit_fsnotify.c | 5 | ||||
-rw-r--r-- | kernel/audit_tree.c | 3 | ||||
-rw-r--r-- | kernel/audit_watch.c | 5 | ||||
-rw-r--r-- | kernel/auditfilter.c | 5 | ||||
-rw-r--r-- | kernel/auditsc.c | 12 | ||||
-rw-r--r-- | kernel/capability.c | 36 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/padata.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 70 | ||||
-rw-r--r-- | kernel/seccomp.c | 7 |
12 files changed, 433 insertions, 257 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 314e7d62f5f0..12c679f769c6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -116,8 +116,6 @@ obj-$(CONFIG_HAS_IOMEM) += memremap.o $(obj)/configs.o: $(obj)/config_data.h -# config_data.h contains the same information as ikconfig.h but gzipped. -# Info from config_data can be extracted from /proc/config* targets += config_data.gz $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(call if_changed,gzip) diff --git a/kernel/audit.c b/kernel/audit.c index 67b9fbd871be..91bff3c0b368 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -107,7 +107,6 @@ static u32 audit_rate_limit; * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) -static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; /* The identity of the user shutting down the audit system. */ @@ -138,11 +137,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock); static int audit_freelist_count; static LIST_HEAD(audit_freelist); -static struct sk_buff_head audit_skb_queue; -/* queue of skbs to send to auditd when/if it comes back */ -static struct sk_buff_head audit_skb_hold_queue; +/* queue msgs to send via kauditd_task */ +static struct sk_buff_head audit_queue; +/* queue msgs due to temporary unicast send problems */ +static struct sk_buff_head audit_retry_queue; +/* queue msgs waiting for new auditd connection */ +static struct sk_buff_head audit_hold_queue; + +/* queue servicing thread */ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); + +/* waitqueue for callers who are blocked on the audit backlog */ static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, @@ -338,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit) static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", - &audit_backlog_wait_time_master, timeout); + &audit_backlog_wait_time, timeout); } static int audit_set_enabled(u32 state) @@ -365,29 +371,10 @@ static int audit_set_failure(u32 state) } /* - * Queue skbs to be sent to auditd when/if it comes back. These skbs should - * already have been sent via prink/syslog and so if these messages are dropped - * it is not a huge concern since we already passed the audit_log_lost() - * notification and stuff. This is just nice to get audit messages during - * boot before auditd is running or messages generated while auditd is stopped. - * This only holds messages is audit_default is set, aka booting with audit=1 - * or building your kernel that way. - */ -static void audit_hold_skb(struct sk_buff *skb) -{ - if (audit_default && - (!audit_backlog_limit || - skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit)) - skb_queue_tail(&audit_skb_hold_queue, skb); - else - kfree_skb(skb); -} - -/* * For one reason or another this nlh isn't getting delivered to the userspace * audit daemon, just send it to printk. */ -static void audit_printk_skb(struct sk_buff *skb) +static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); @@ -398,58 +385,123 @@ static void audit_printk_skb(struct sk_buff *skb) else audit_log_lost("printk limit exceeded"); } +} + +/** + * kauditd_hold_skb - Queue an audit record, waiting for auditd + * @skb: audit record + * + * Description: + * Queue the audit record, waiting for an instance of auditd. When this + * function is called we haven't given up yet on sending the record, but things + * are not looking good. The first thing we want to do is try to write the + * record via printk and then see if we want to try and hold on to the record + * and queue it, if we have room. If we want to hold on to the record, but we + * don't have room, record a record lost message. + */ +static void kauditd_hold_skb(struct sk_buff *skb) +{ + /* at this point it is uncertain if we will ever send this to auditd so + * try to send the message via printk before we go any further */ + kauditd_printk_skb(skb); + + /* can we just silently drop the message? */ + if (!audit_default) { + kfree_skb(skb); + return; + } + + /* if we have room, queue the message */ + if (!audit_backlog_limit || + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_hold_queue, skb); + return; + } - audit_hold_skb(skb); + /* we have no other options - drop the message */ + audit_log_lost("kauditd hold queue overflow"); + kfree_skb(skb); } -static void kauditd_send_skb(struct sk_buff *skb) +/** + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd + * @skb: audit record + * + * Description: + * Not as serious as kauditd_hold_skb() as we still have a connected auditd, + * but for some reason we are having problems sending it audit records so + * queue the given record and attempt to resend. + */ +static void kauditd_retry_skb(struct sk_buff *skb) { - int err; - int attempts = 0; -#define AUDITD_RETRIES 5 + /* NOTE: because records should only live in the retry queue for a + * short period of time, before either being sent or moved to the hold + * queue, we don't currently enforce a limit on this queue */ + skb_queue_tail(&audit_retry_queue, skb); +} + +/** + * auditd_reset - Disconnect the auditd connection + * + * Description: + * Break the auditd/kauditd connection and move all the records in the retry + * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex + * must be held when calling this function. + */ +static void auditd_reset(void) +{ + struct sk_buff *skb; + + /* break the connection */ + if (audit_sock) { + sock_put(audit_sock); + audit_sock = NULL; + } + audit_pid = 0; + audit_nlk_portid = 0; + + /* flush all of the retry queue to the hold queue */ + while ((skb = skb_dequeue(&audit_retry_queue))) + kauditd_hold_skb(skb); +} + +/** + * kauditd_send_unicast_skb - Send a record via unicast to auditd + * @skb: audit record + */ +static int kauditd_send_unicast_skb(struct sk_buff *skb) +{ + int rc; -restart: - /* take a reference in case we can't send it and we want to hold it */ + /* if we know nothing is connected, don't even try the netlink call */ + if (!audit_pid) + return -ECONNREFUSED; + + /* get an extra skb reference in case we fail to send */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); - if (err < 0) { - pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n", - audit_pid, err); - if (audit_pid) { - if (err == -ECONNREFUSED || err == -EPERM - || ++attempts >= AUDITD_RETRIES) { - char s[32]; - - snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid); - audit_log_lost(s); - audit_pid = 0; - audit_sock = NULL; - } else { - pr_warn("re-scheduling(#%d) write to audit_pid=%d\n", - attempts, audit_pid); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - goto restart; - } - } - /* we might get lucky and get this in the next auditd */ - audit_hold_skb(skb); - } else - /* drop the extra reference if sent ok */ + rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); + if (rc >= 0) { consume_skb(skb); + rc = 0; + } + + return rc; } /* - * kauditd_send_multicast_skb - send the skb to multicast userspace listeners + * kauditd_send_multicast_skb - Send a record to any multicast listeners + * @skb: audit record * + * Description: * This function doesn't consume an skb as might be expected since it has to * copy it anyways. */ -static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) +static void kauditd_send_multicast_skb(struct sk_buff *skb) { - struct sk_buff *copy; - struct audit_net *aunet = net_generic(&init_net, audit_net_id); - struct sock *sock = aunet->nlsk; + struct sk_buff *copy; + struct audit_net *aunet = net_generic(&init_net, audit_net_id); + struct sock *sock = aunet->nlsk; + struct nlmsghdr *nlh; if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; @@ -464,74 +516,161 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) * no reason for new multicast clients to continue with this * non-compliance. */ - copy = skb_copy(skb, gfp_mask); + copy = skb_copy(skb, GFP_KERNEL); if (!copy) return; + nlh = nlmsg_hdr(copy); + nlh->nlmsg_len = skb->len; - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); } -/* - * flush_hold_queue - empty the hold queue if auditd appears - * - * If auditd just started, drain the queue of messages already - * sent to syslog/printk. Remember loss here is ok. We already - * called audit_log_lost() if it didn't go out normally. so the - * race between the skb_dequeue and the next check for audit_pid - * doesn't matter. +/** + * kauditd_wake_condition - Return true when it is time to wake kauditd_thread * - * If you ever find kauditd to be too slow we can get a perf win - * by doing our own locking and keeping better track if there - * are messages in this queue. I don't see the need now, but - * in 5 years when I want to play with this again I'll see this - * note and still have no friggin idea what i'm thinking today. + * Description: + * This function is for use by the wait_event_freezable() call in + * kauditd_thread(). */ -static void flush_hold_queue(void) +static int kauditd_wake_condition(void) { - struct sk_buff *skb; - - if (!audit_default || !audit_pid) - return; - - skb = skb_dequeue(&audit_skb_hold_queue); - if (likely(!skb)) - return; + static int pid_last = 0; + int rc; + int pid = audit_pid; - while (skb && audit_pid) { - kauditd_send_skb(skb); - skb = skb_dequeue(&audit_skb_hold_queue); - } + /* wake on new messages or a change in the connected auditd */ + rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last); + if (rc) + pid_last = pid; - /* - * if auditd just disappeared but we - * dequeued an skb we need to drop ref - */ - consume_skb(skb); + return rc; } static int kauditd_thread(void *dummy) { + int rc; + int auditd = 0; + int reschedule = 0; + struct sk_buff *skb; + struct nlmsghdr *nlh; + +#define UNICAST_RETRIES 5 +#define AUDITD_BAD(x,y) \ + ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES) + + /* NOTE: we do invalidate the auditd connection flag on any sending + * errors, but we only "restore" the connection flag at specific places + * in the loop in order to help ensure proper ordering of audit + * records */ + set_freezable(); while (!kthread_should_stop()) { - struct sk_buff *skb; - - flush_hold_queue(); + /* NOTE: possible area for future improvement is to look at + * the hold and retry queues, since only this thread + * has access to these queues we might be able to do + * our own queuing and skip some/all of the locking */ + + /* NOTE: it might be a fun experiment to split the hold and + * retry queue handling to another thread, but the + * synchronization issues and other overhead might kill + * any performance gains */ + + /* attempt to flush the hold queue */ + while (auditd && (skb = skb_dequeue(&audit_hold_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + /* requeue to the same spot */ + skb_queue_head(&audit_hold_queue, skb); + + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); + auditd_reset(); + mutex_unlock(&audit_cmd_mutex); + reschedule = 0; + } + } else + /* we were able to send successfully */ + reschedule = 0; + } - skb = skb_dequeue(&audit_skb_queue); + /* attempt to flush the retry queue */ + while (auditd && (skb = skb_dequeue(&audit_retry_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + kauditd_hold_skb(skb); + mutex_lock(&audit_cmd_mutex); + auditd_reset(); + mutex_unlock(&audit_cmd_mutex); + reschedule = 0; + } else + /* temporary problem (we hope), queue + * to the same spot and retry */ + skb_queue_head(&audit_retry_queue, skb); + } else + /* we were able to send successfully */ + reschedule = 0; + } + /* standard queue processing, try to be as quick as possible */ +quick_loop: + skb = skb_dequeue(&audit_queue); if (skb) { - if (!audit_backlog_limit || - (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit)) - wake_up(&audit_backlog_wait); - if (audit_pid) - kauditd_send_skb(skb); + /* setup the netlink header, see the comments in + * kauditd_send_multicast_skb() for length quirks */ + nlh = nlmsg_hdr(skb); + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; + + /* attempt to send to any multicast listeners */ + kauditd_send_multicast_skb(skb); + + /* attempt to send to auditd, queue on failure */ + if (auditd) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); + auditd_reset(); + mutex_unlock(&audit_cmd_mutex); + reschedule = 0; + } + + /* move to the retry queue */ + kauditd_retry_skb(skb); + } else + /* everything is working so go fast! */ + goto quick_loop; + } else if (reschedule) + /* we are currently having problems, move to + * the retry queue */ + kauditd_retry_skb(skb); else - audit_printk_skb(skb); - continue; - } + /* dump the message via printk and hold it */ + kauditd_hold_skb(skb); + } else { + /* we have flushed the backlog so wake everyone */ + wake_up(&audit_backlog_wait); + + /* if everything is okay with auditd (if present), go + * to sleep until there is something new in the queue + * or we have a change in the connected auditd; + * otherwise simply reschedule to give things a chance + * to recover */ + if (reschedule) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else + wait_event_freezable(kauditd_wait, + kauditd_wake_condition()); - wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue)); + /* update the auditd connection status */ + auditd = (audit_pid ? 1 : 0); + } } + return 0; } @@ -596,6 +735,7 @@ static int audit_send_reply_thread(void *arg) kfree(reply); return 0; } + /** * audit_send_reply - send an audit reply message via netlink * @request_skb: skb of request we are replying to (used to target the reply) @@ -832,16 +972,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) return err; - /* As soon as there's any sign of userspace auditd, - * start kauditd to talk to it */ - if (!kauditd_task) { - kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) { - err = PTR_ERR(kauditd_task); - kauditd_task = NULL; - return err; - } - } seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); @@ -855,9 +985,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); - s.backlog = skb_queue_len(&audit_skb_queue); + s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; - s.backlog_wait_time = audit_backlog_wait_time_master; + s.backlog_wait_time = audit_backlog_wait_time; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -897,9 +1027,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, audit_pid, 1); - audit_pid = new_pid; - audit_nlk_portid = NETLINK_CB(skb).portid; - audit_sock = skb->sk; + if (new_pid) { + if (audit_sock) + sock_put(audit_sock); + audit_pid = new_pid; + audit_nlk_portid = NETLINK_CB(skb).portid; + sock_hold(skb->sk); + audit_sock = skb->sk; + } else { + auditd_reset(); + } + wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); @@ -1167,10 +1305,10 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); struct sock *sock = aunet->nlsk; - if (sock == audit_sock) { - audit_pid = 0; - audit_sock = NULL; - } + mutex_lock(&audit_cmd_mutex); + if (sock == audit_sock) + auditd_reset(); + mutex_unlock(&audit_cmd_mutex); netlink_kernel_release(sock); aunet->nlsk = NULL; @@ -1195,17 +1333,24 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); - skb_queue_head_init(&audit_skb_queue); - skb_queue_head_init(&audit_skb_hold_queue); + skb_queue_head_init(&audit_queue); + skb_queue_head_init(&audit_retry_queue); + skb_queue_head_init(&audit_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; - audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); - for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); + if (IS_ERR(kauditd_task)) { + int err = PTR_ERR(kauditd_task); + panic("audit: failed to start the kauditd thread (%d)\n", err); + } + + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); + return 0; } __initcall(audit_init); @@ -1338,24 +1483,6 @@ static inline void audit_get_stamp(struct audit_context *ctx, } } -/* - * Wait for auditd to drain the queue a little - */ -static long wait_for_auditd(long sleep_time) -{ - DECLARE_WAITQUEUE(wait, current); - - if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { - add_wait_queue_exclusive(&audit_backlog_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - sleep_time = schedule_timeout(sleep_time); - remove_wait_queue(&audit_backlog_wait, &wait); - } - - return sleep_time; -} - /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) @@ -1374,12 +1501,9 @@ static long wait_for_auditd(long sleep_time) struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { - struct audit_buffer *ab = NULL; - struct timespec t; - unsigned int uninitialized_var(serial); - int reserve = 5; /* Allow atomic callers to go up to five - entries over the normal backlog limit */ - unsigned long timeout_start = jiffies; + struct audit_buffer *ab; + struct timespec t; + unsigned int uninitialized_var(serial); if (audit_initialized != AUDIT_INITIALIZED) return NULL; @@ -1387,38 +1511,48 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - if (gfp_mask & __GFP_DIRECT_RECLAIM) { - if (audit_pid && audit_pid == current->tgid) - gfp_mask &= ~__GFP_DIRECT_RECLAIM; - else - reserve = 0; - } - - while (audit_backlog_limit - && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { - if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { - long sleep_time; + /* don't ever fail/sleep on these two conditions: + * 1. auditd generated record - since we need auditd to drain the + * queue; also, when we are checking for auditd, compare PIDs using + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() + * using a PID anchored in the caller's namespace + * 2. audit command message - record types 1000 through 1099 inclusive + * are command messages/records used to manage the kernel subsystem + * and the audit userspace, blocking on these messages could cause + * problems under load so don't do it (note: not all of these + * command types are valid as record types, but it is quicker to + * just check two ints than a series of ints in a if/switch stmt) */ + if (!((audit_pid && audit_pid == task_tgid_vnr(current)) || + (type >= 1000 && type <= 1099))) { + long sleep_time = audit_backlog_wait_time; + + while (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); - sleep_time = timeout_start + audit_backlog_wait_time - jiffies; - if (sleep_time > 0) { - sleep_time = wait_for_auditd(sleep_time); - if (sleep_time > 0) - continue; + /* sleep if we are allowed and we haven't exhausted our + * backlog wait limit */ + if ((gfp_mask & __GFP_DIRECT_RECLAIM) && + (sleep_time > 0)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&audit_backlog_wait, + &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + sleep_time = schedule_timeout(sleep_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } else { + if (audit_rate_check() && printk_ratelimit()) + pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", + skb_queue_len(&audit_queue), + audit_backlog_limit); + audit_log_lost("backlog limit exceeded"); + return NULL; } } - if (audit_rate_check() && printk_ratelimit()) - pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", - skb_queue_len(&audit_skb_queue), - audit_backlog_limit); - audit_log_lost("backlog limit exceeded"); - audit_backlog_wait_time = 0; - wake_up(&audit_backlog_wait); - return NULL; } - if (!reserve && !audit_backlog_wait_time) - audit_backlog_wait_time = audit_backlog_wait_time_master; - ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); @@ -1426,9 +1560,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); - audit_log_format(ab, "audit(%lu.%03lu:%u): ", t.tv_sec, t.tv_nsec/1000000, serial); + return ab; } @@ -1978,10 +2112,10 @@ out: * audit_log_end - end one audit record * @ab: the audit_buffer * - * netlink_unicast() cannot be called inside an irq context because it blocks - * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed - * on a queue and a tasklet is scheduled to remove them from the queue outside - * the irq context. May be called in any context. + * We can not do a netlink send inside an irq context because it blocks (last + * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a + * queue and a tasklet is scheduled to remove them from the queue outside the + * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { @@ -1990,28 +2124,8 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - - nlh->nlmsg_len = ab->skb->len; - kauditd_send_multicast_skb(ab->skb, ab->gfp_mask); - - /* - * The original kaudit unicast socket sends up messages with - * nlmsg_len set to the payload length rather than the entire - * message length. This breaks the standard set by netlink. - * The existing auditd daemon assumes this breakage. Fixing - * this would require co-ordinating a change in the established - * protocol between the kaudit kernel subsystem and the auditd - * userspace code. - */ - nlh->nlmsg_len -= NLMSG_HDRLEN; - - if (audit_pid) { - skb_queue_tail(&audit_skb_queue, ab->skb); - wake_up_interruptible(&kauditd_wait); - } else { - audit_printk_skb(ab->skb); - } + skb_queue_tail(&audit_queue, ab->skb); + wake_up_interruptible(&kauditd_wait); ab->skb = NULL; } audit_buffer_free(ab); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index f84f8d06e1f6..f75154889aa9 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -130,10 +130,9 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "auid=%u ses=%u op=", + audit_log_format(ab, "auid=%u ses=%u op=%s", from_kuid(&init_user_ns, audit_get_loginuid(current)), - audit_get_sessionid(current)); - audit_log_string(ab, op); + audit_get_sessionid(current), op); audit_log_format(ab, " path="); audit_log_untrustedstring(ab, audit_mark->path); audit_log_key(ab, rule->filterkey); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 25772476fa4a..055f11b0a50f 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -458,8 +458,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule) ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "op="); - audit_log_string(ab, "remove_rule"); + audit_log_format(ab, "op=remove_rule"); audit_log_format(ab, " dir="); audit_log_untrustedstring(ab, rule->tree->pathname); audit_log_key(ab, rule->filterkey); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0d302a87f21b..686e068ec3da 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -242,10 +242,9 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "auid=%u ses=%u op=", + audit_log_format(ab, "auid=%u ses=%u op=%s", from_kuid(&init_user_ns, audit_get_loginuid(current)), - audit_get_sessionid(current)); - audit_log_string(ab, op); + audit_get_sessionid(current), op); audit_log_format(ab, " path="); audit_log_untrustedstring(ab, w->path); audit_log_key(ab, r->filterkey); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 85d9cac497e4..880519d6cf2a 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -363,6 +363,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) case AUDIT_EXIT: case AUDIT_SUCCESS: case AUDIT_INODE: + case AUDIT_SESSIONID: /* bit ops are only useful on syscall args */ if (f->op == Audit_bitmask || f->op == Audit_bittest) return -EINVAL; @@ -476,6 +477,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if (!gid_valid(f->gid)) goto exit_free; break; + case AUDIT_SESSIONID: case AUDIT_ARCH: entry->rule.arch_f = f; break; @@ -1074,8 +1076,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re return; audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid); audit_log_task_context(ab); - audit_log_format(ab, " op="); - audit_log_string(ab, action); + audit_log_format(ab, " op=%s", action); audit_log_key(ab, rule->filterkey); audit_log_format(ab, " list=%d res=%d", rule->listnr, res); audit_log_end(ab); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2cd5256dbff7..cf1fa43512c1 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -446,6 +446,7 @@ static int audit_filter_rules(struct task_struct *tsk, const struct cred *cred; int i, need_sid = 1; u32 sid; + unsigned int sessionid; cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); @@ -508,6 +509,10 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_FSGID: result = audit_gid_comparator(cred->fsgid, f->op, f->gid); break; + case AUDIT_SESSIONID: + sessionid = audit_get_sessionid(current); + result = audit_comparator(sessionid, f->op, f->val); + break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); break; @@ -1000,7 +1005,7 @@ static void audit_log_execve_info(struct audit_context *context, long len_rem; long len_full; long len_buf; - long len_abuf; + long len_abuf = 0; long len_tmp; bool require_data; bool encode; @@ -2025,8 +2030,11 @@ int audit_set_loginuid(kuid_t loginuid) goto out; /* are we setting or clearing? */ - if (uid_valid(loginuid)) + if (uid_valid(loginuid)) { sessionid = (unsigned int)atomic_inc_return(&session_id); + if (unlikely(sessionid == (unsigned int)-1)) + sessionid = (unsigned int)atomic_inc_return(&session_id); + } task->sessionid = sessionid; task->loginuid = loginuid; diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac5..4984e1f552eb 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, EXPORT_SYMBOL(file_ns_capable); /** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + +/** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question * @cap: The capability in question @@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/fork.c b/kernel/fork.c index a439ac429669..869b8ccc00bf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -747,7 +747,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -787,6 +788,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -832,7 +834,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -847,6 +849,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1128,7 +1131,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/padata.c b/kernel/padata.c index 7848f0566403..05316c9f32da 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -64,15 +64,11 @@ static int padata_cpu_hash(struct parallel_data *pd) static void padata_parallel_worker(struct work_struct *parallel_work) { struct padata_parallel_queue *pqueue; - struct parallel_data *pd; - struct padata_instance *pinst; LIST_HEAD(local_list); local_bh_disable(); pqueue = container_of(parallel_work, struct padata_parallel_queue, work); - pd = pqueue->pd; - pinst = pd->pinst; spin_lock(&pqueue->parallel.lock); list_replace_init(&pqueue->parallel.list, &local_list); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6474f7272ec..49ba7c1ade9d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,6 +27,35 @@ #include <linux/cn_proc.h> #include <linux/compat.h> +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + /* * ptrace a task: make the debugger its new parent and @@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; @@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); @@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, + retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) @@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), + ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; @@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), + ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bff9c774987a..f7ce79a46050 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -41,8 +41,7 @@ * outside of a lifetime-guarded section. In general, this * is only needed for handling filters shared across tasks. * @prev: points to a previously installed, or inherited, filter - * @len: the number of instructions in the program - * @insnsi: the BPF program instructions to evaluate + * @prog: the BPF program to evaluate * * seccomp_filter objects are organized in a tree linked via the @prev * pointer. For any task, it appears to be a singly-linked list starting @@ -168,8 +167,8 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) } /** - * seccomp_run_filters - evaluates all seccomp filters against @syscall - * @syscall: number of the current system call + * seccomp_run_filters - evaluates all seccomp filters against @sd + * @sd: optional seccomp data to be passed to filters * * Returns valid seccomp BPF response codes. */ |