aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller2020-06-13 15:28:08 -0700
committerDavid S. Miller2020-06-13 15:28:08 -0700
commitfa7566a0d68f8467846cba8ec29f1551b0a42de9 (patch)
treec520797b309d68cacd4bb9089473af7bf8644f07 /net
parentbf97bac9dc6481e9f68992e52bed5cc4b210e636 (diff)
parent29fcb05bbf1a7008900bb9bee347bdbfc7171036 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says: ==================== pull-request: bpf 2020-06-12 The following pull-request contains BPF updates for your *net* tree. We've added 26 non-merge commits during the last 10 day(s) which contain a total of 27 files changed, 348 insertions(+), 93 deletions(-). The main changes are: 1) sock_hash accounting fix, from Andrey. 2) libbpf fix and probe_mem sanitizing, from Andrii. 3) sock_hash fixes, from Jakub. 4) devmap_val fix, from Jesper. 5) load_bytes_relative fix, from YiFei. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/sock_map.c38
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/xdp/xsk.c4
4 files changed, 48 insertions, 19 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 209482a4eaa2..73395384afe2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
u8 *end = skb_tail_pointer(skb);
- u8 *net = skb_network_header(skb);
- u8 *mac = skb_mac_header(skb);
- u8 *ptr;
+ u8 *start, *ptr;
- if (unlikely(offset > 0xffff || len > (end - mac)))
+ if (unlikely(offset > 0xffff))
goto err_clear;
switch (start_header) {
case BPF_HDR_START_MAC:
- ptr = mac + offset;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ goto err_clear;
+ start = skb_mac_header(skb);
break;
case BPF_HDR_START_NET:
- ptr = net + offset;
+ start = skb_network_header(skb);
break;
default:
goto err_clear;
}
- if (likely(ptr >= mac && ptr + len <= end)) {
+ ptr = start + offset;
+
+ if (likely(ptr + len <= end)) {
memcpy(to, ptr, len);
return 0;
}
@@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
break;
case SO_BINDTODEVICE:
- ret = -ENOPROTOOPT;
-#ifdef CONFIG_NETDEVICES
optlen = min_t(long, optlen, IFNAMSIZ - 1);
strncpy(devname, optval, optlen);
devname[optlen] = 0;
@@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
}
ret = sock_bindtoindex(sk, ifindex, false);
-#endif
break;
default:
ret = -EINVAL;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 00a26cf2cfe9..4059f94e9bb5 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -424,10 +424,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
-static bool sock_map_redirect_allowed(const struct sock *sk)
-{
- return sk->sk_state != TCP_LISTEN;
-}
+static bool sock_map_redirect_allowed(const struct sock *sk);
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
@@ -508,6 +505,11 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static bool sock_map_redirect_allowed(const struct sock *sk)
+{
+ return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+}
+
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
@@ -989,11 +991,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
err = -EINVAL;
goto free_htab;
}
+ err = bpf_map_charge_init(&htab->map.memory, cost);
+ if (err)
+ goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
+ bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
@@ -1013,6 +1019,7 @@ static void sock_hash_free(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct bpf_htab_bucket *bucket;
+ struct hlist_head unlink_list;
struct bpf_htab_elem *elem;
struct hlist_node *node;
int i;
@@ -1024,13 +1031,32 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
- hlist_del_rcu(&elem->node);
+
+ /* We are racing with sock_hash_delete_from_link to
+ * enter the spin-lock critical section. Every socket on
+ * the list is still linked to sockhash. Since link
+ * exists, psock exists and holds a ref to socket. That
+ * lets us to grab a socket ref too.
+ */
+ raw_spin_lock_bh(&bucket->lock);
+ hlist_for_each_entry(elem, &bucket->head, node)
+ sock_hold(elem->sk);
+ hlist_move_list(&bucket->head, &unlink_list);
+ raw_spin_unlock_bh(&bucket->lock);
+
+ /* Process removed entries out of atomic context to
+ * block for socket lock before deleting the psock's
+ * link to sockhash.
+ */
+ hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
+ hlist_del(&elem->node);
lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk);
+ sock_put(elem->sk);
+ sock_hash_free_elem(htab, elem);
}
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 629aaa9a1eb9..7aa68f4aae6c 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
} while (i != msg_rx->sg.end);
if (unlikely(peek)) {
+ if (msg_rx == list_last_entry(&psock->ingress_msg,
+ struct sk_msg, list))
+ break;
msg_rx = list_next_entry(msg_rx, list);
continue;
}
@@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int ret = 0;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
if (!timeo)
return ret;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b6c0f08bd80d..3700266229f6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk)
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
- if (unlikely(!skb)) {
- err = -EAGAIN;
+ if (unlikely(!skb))
goto out;
- }
skb_put(skb, len);
addr = desc.addr;