aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorMartin KaFai Lau2018-08-08 01:01:26 -0700
committerDaniel Borkmann2018-08-11 01:58:46 +0200
commit8217ca653ec601246832d562207bc24bdf652d2f (patch)
treedf0a3c46c4f4765cdec2e9027b261f5a70e6a23d /net/core
parent2dbb9b9e6df67d444fbe425c7f6014858d337adf (diff)
bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport selection
This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in the earlier patch. "bpf_run_sk_reuseport()" will return -ECONNREFUSED when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP. The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to handle this case and return NULL immediately instead of continuing the sk search from its hashtable. It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach BPF_PROG_TYPE_SK_REUSEPORT. The "sk_reuseport_attach_bpf()" will check if the attaching bpf prog is in the new SK_REUSEPORT or the existing SOCKET_FILTER type and then check different things accordingly. One level of "__reuseport_attach_prog()" call is removed. The "sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed back to "reuseport_attach_prog()" in sock_reuseport.c. sock_reuseport.c seems to have more knowledge on those test requirements than filter.c. In "reuseport_attach_prog()", after new_prog is attached to reuse->prog, the old_prog (if any) is also directly freed instead of returning the old_prog to the caller and asking the caller to free. The sysctl_optmem_max check is moved back to the "sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()". As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only bounded by the usual "bpf_prog_charge_memlock()" during load time instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/filter.c87
-rw-r--r--net/core/sock_reuseport.c36
2 files changed, 78 insertions, 45 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 142595b4e0d1..22906b31d43f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}
-static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
-{
- struct bpf_prog *old_prog;
- int err;
-
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
- return -ENOMEM;
-
- if (sk_unhashed(sk) && sk->sk_reuseport) {
- err = reuseport_alloc(sk, false);
- if (err)
- return err;
- } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
- /* The socket wasn't bound with SO_REUSEPORT */
- return -EINVAL;
- }
-
- old_prog = reuseport_attach_prog(sk, prog);
- if (old_prog)
- bpf_prog_destroy(old_prog);
-
- return 0;
-}
-
static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __reuseport_attach_prog(prog, sk);
- if (err < 0) {
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ err = -ENOMEM;
+ else
+ err = reuseport_attach_prog(sk, prog);
+
+ if (err)
__bpf_prog_release(prog);
- return err;
- }
- return 0;
+ return err;
}
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
- struct bpf_prog *prog = __get_bpf(ufd, sk);
+ struct bpf_prog *prog;
int err;
+ if (sock_flag(sk, SOCK_FILTER_LOCKED))
+ return -EPERM;
+
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __reuseport_attach_prog(prog, sk);
- if (err < 0) {
- bpf_prog_put(prog);
- return err;
+ if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
+ /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
+ * bpf prog (e.g. sockmap). It depends on the
+ * limitation imposed by bpf_prog_load().
+ * Hence, sysctl_optmem_max is not checked.
+ */
+ if ((sk->sk_type != SOCK_STREAM &&
+ sk->sk_type != SOCK_DGRAM) ||
+ (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_TCP) ||
+ (sk->sk_family != AF_INET &&
+ sk->sk_family != AF_INET6)) {
+ err = -ENOTSUPP;
+ goto err_prog_put;
+ }
+ } else {
+ /* BPF_PROG_TYPE_SOCKET_FILTER */
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ err = -ENOMEM;
+ goto err_prog_put;
+ }
}
- return 0;
+ err = reuseport_attach_prog(sk, prog);
+err_prog_put:
+ if (err)
+ bpf_prog_put(prog);
+
+ return err;
+}
+
+void sk_reuseport_prog_free(struct bpf_prog *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+ bpf_prog_put(prog);
+ else
+ bpf_prog_destroy(prog);
}
struct bpf_scratchpad {
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index d260167f5f77..ba5cba56f574 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -9,6 +9,7 @@
#include <net/sock_reuseport.h>
#include <linux/bpf.h>
#include <linux/idr.h>
+#include <linux/filter.h>
#include <linux/rcupdate.h>
#define INIT_SOCKS 128
@@ -133,8 +134,7 @@ static void reuseport_free_rcu(struct rcu_head *head)
struct sock_reuseport *reuse;
reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
+ sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
if (reuse->reuseport_id)
ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
kfree(reuse);
@@ -219,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
}
EXPORT_SYMBOL(reuseport_detach_sock);
-static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
- struct bpf_prog *prog, struct sk_buff *skb,
- int hdr_len)
+static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ int hdr_len)
{
struct sk_buff *nskb = NULL;
u32 index;
@@ -282,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
/* paired with smp_wmb() in reuseport_add_sock() */
smp_rmb();
- if (prog && skb)
- sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+ if (!prog || !skb)
+ goto select_by_hash;
+
+ if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+ sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+ else
+ sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
+select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */
if (!sk2)
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
@@ -296,12 +302,21 @@ out:
}
EXPORT_SYMBOL(reuseport_select_sock);
-struct bpf_prog *
-reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
+int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
{
struct sock_reuseport *reuse;
struct bpf_prog *old_prog;
+ if (sk_unhashed(sk) && sk->sk_reuseport) {
+ int err = reuseport_alloc(sk, false);
+
+ if (err)
+ return err;
+ } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
+ /* The socket wasn't bound with SO_REUSEPORT */
+ return -EINVAL;
+ }
+
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
@@ -310,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
rcu_assign_pointer(reuse->prog, prog);
spin_unlock_bh(&reuseport_lock);
- return old_prog;
+ sk_reuseport_prog_free(old_prog);
+ return 0;
}
EXPORT_SYMBOL(reuseport_attach_prog);