diff options
author | Linus Torvalds | 2023-04-20 11:03:51 -0700 |
---|---|---|
committer | Linus Torvalds | 2023-04-20 11:03:51 -0700 |
commit | 23309d600db1abb73b77ca35db96133b7fc35959 (patch) | |
tree | 636734ca5a79ee73d543cdb1def2ea50e5266a03 /net | |
parent | cb0856346a60fe3eb837ba5e73588a41f81ac05f (diff) | |
parent | 927cdea5d2095287ddd5246e5aa68eb5d68db2be (diff) |
Merge tag 'net-6.3-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from netfilter and bpf.
There are a few fixes for new code bugs, including the Mellanox one
noted in the last networking pull. No known regressions outstanding.
Current release - regressions:
- sched: clear actions pointer in miss cookie init fail
- mptcp: fix accept vs worker race
- bpf: fix bpf_arch_text_poke() with new_addr == NULL on s390
- eth: bnxt_en: fix a possible NULL pointer dereference in unload
path
- eth: veth: take into account peer device for
NETDEV_XDP_ACT_NDO_XMIT xdp_features flag
Current release - new code bugs:
- eth: revert "net/mlx5: Enable management PF initialization"
Previous releases - regressions:
- netfilter: fix recent physdev match breakage
- bpf: fix incorrect verifier pruning due to missing register
precision taints
- eth: virtio_net: fix overflow inside xdp_linearize_page()
- eth: cxgb4: fix use after free bugs caused by circular dependency
problem
- eth: mlxsw: pci: fix possible crash during initialization
Previous releases - always broken:
- sched: sch_qfq: prevent slab-out-of-bounds in qfq_activate_agg
- netfilter: validate catch-all set elements
- bridge: don't notify FDB entries with "master dynamic"
- eth: bonding: fix memory leak when changing bond type to ethernet
- eth: i40e: fix accessing vsi->active_filters without holding lock
Misc:
- Mat is back as MPTCP co-maintainer"
* tag 'net-6.3-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (33 commits)
net: bridge: switchdev: don't notify FDB entries with "master dynamic"
Revert "net/mlx5: Enable management PF initialization"
MAINTAINERS: Resume MPTCP co-maintainer role
mailmap: add entries for Mat Martineau
e1000e: Disable TSO on i219-LM card to increase speed
bnxt_en: fix free-runnig PHC mode
net: dsa: microchip: ksz8795: Correctly handle huge frame configuration
bpf: Fix incorrect verifier pruning due to missing register precision taints
hamradio: drop ISA_DMA_API dependency
mlxsw: pci: Fix possible crash during initialization
mptcp: fix accept vs worker race
mptcp: stops worker on unaccepted sockets at listener close
net: rpl: fix rpl header size calculation
net: vmxnet3: Fix NULL pointer dereference in vmxnet3_rq_rx_complete()
bonding: Fix memory leak when changing bond type to Ethernet
veth: take into account peer device for NETDEV_XDP_ACT_NDO_XMIT xdp_features flag
mlxfw: fix null-ptr-deref in mlxfw_mfa2_tlv_next()
bnxt_en: Fix a possible NULL pointer dereference in unload path
bnxt_en: Do not initialize PTP on older P3/P4 chips
netfilter: nf_tables: tighten netlink attribute requirements for catch-all elements
...
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_netfilter_hooks.c | 17 | ||||
-rw-r--r-- | net/bridge/br_switchdev.c | 11 | ||||
-rw-r--r-- | net/ipv6/rpl.c | 3 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 74 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 2 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 80 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 69 | ||||
-rw-r--r-- | net/netfilter/nft_lookup.c | 36 | ||||
-rw-r--r-- | net/sched/cls_api.c | 3 | ||||
-rw-r--r-- | net/sched/sch_qfq.c | 13 |
10 files changed, 228 insertions, 80 deletions
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 638a4d5359db..4bc6761517bb 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -868,12 +868,17 @@ static unsigned int ip_sabotage_in(void *priv, { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - if (nf_bridge && !nf_bridge->in_prerouting && - !netif_is_l3_master(skb->dev) && - !netif_is_l3_slave(skb->dev)) { - nf_bridge_info_free(skb); - state->okfn(state->net, state->sk, skb); - return NF_STOLEN; + if (nf_bridge) { + if (nf_bridge->sabotage_in_done) + return NF_ACCEPT; + + if (!nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev) && + !netif_is_l3_slave(skb->dev)) { + nf_bridge->sabotage_in_done = 1; + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } } return NF_ACCEPT; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index de18e9c1d7a7..ba95c4d74a60 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -148,6 +148,17 @@ br_switchdev_fdb_notify(struct net_bridge *br, if (test_bit(BR_FDB_LOCKED, &fdb->flags)) return; + /* Entries with these flags were created using ndm_state == NUD_REACHABLE, + * ndm_flags == NTF_MASTER( | NTF_STICKY), ext_flags == 0 by something + * equivalent to 'bridge fdb add ... master dynamic (sticky)'. + * Drivers don't know how to deal with these, so don't notify them to + * avoid confusing them. + */ + if (test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags) && + !test_bit(BR_FDB_STATIC, &fdb->flags) && + !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) + return; + br_switchdev_fdb_populate(br, &item, fdb, NULL); switch (type) { diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index 488aec9e1a74..d1876f192225 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -32,7 +32,8 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i) size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, unsigned char cmpre) { - return (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre); + return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) + + IPV6_PFXTAIL_LEN(cmpre); } void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 06c5872e3b00..b998e9df53ce 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2315,7 +2315,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, unsigned int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - bool need_push, dispose_it; + bool dispose_it, need_push = false; + + /* If the first subflow moved to a close state before accept, e.g. due + * to an incoming reset, mptcp either: + * - if either the subflow or the msk are dead, destroy the context + * (the subflow socket is deleted by inet_child_forget) and the msk + * - otherwise do nothing at the moment and take action at accept and/or + * listener shutdown - user-space must be able to accept() the closed + * socket. + */ + if (msk->in_accept_queue && msk->first == ssk) { + if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) + return; + + /* ensure later check in mptcp_worker() will dispose the msk */ + sock_set_flag(sk, SOCK_DEAD); + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_drop_ctx(ssk); + goto out_release; + } dispose_it = !msk->subflow || ssk != msk->subflow->sk; if (dispose_it) @@ -2351,28 +2370,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (!inet_csk(ssk)->icsk_ulp_ops) { WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); kfree_rcu(subflow, rcu); - } else if (msk->in_accept_queue && msk->first == ssk) { - /* if the first subflow moved to a close state, e.g. due to - * incoming reset and we reach here before inet_child_forget() - * the TCP stack could later try to close it via - * inet_csk_listen_stop(), or deliver it to the user space via - * accept(). - * We can't delete the subflow - or risk a double free - nor let - * the msk survive - or will be leaked in the non accept scenario: - * fallback and let TCP cope with the subflow cleanup. - */ - WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); - mptcp_subflow_drop_ctx(ssk); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) + if (ssk->sk_state == TCP_LISTEN) { + tcp_set_state(ssk, TCP_CLOSE); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + } __tcp_close(ssk, 0); /* close acquired an extra ref */ __sock_put(ssk); } + +out_release: release_sock(ssk); sock_put(ssk); @@ -2427,21 +2440,14 @@ static void __mptcp_close_subflow(struct sock *sk) mptcp_close_ssk(sk, ssk, subflow); } - /* if the MPC subflow has been closed before the msk is accepted, - * msk will never be accept-ed, close it now - */ - if (!msk->first && msk->in_accept_queue) { - sock_set_flag(sk, SOCK_DEAD); - inet_sk_state_store(sk, TCP_CLOSE); - } } -static bool mptcp_check_close_timeout(const struct sock *sk) +static bool mptcp_should_close(const struct sock *sk) { s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; struct mptcp_subflow_context *subflow; - if (delta >= TCP_TIMEWAIT_LEN) + if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) return true; /* if all subflows are in closed status don't bother with additional @@ -2649,7 +2655,7 @@ static void mptcp_worker(struct work_struct *work) * even if it is orphaned and in FIN_WAIT2 state */ if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_check_close_timeout(sk)) { + if (mptcp_should_close(sk)) { inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); } @@ -2895,6 +2901,14 @@ static void __mptcp_destroy_sock(struct sock *sk) sock_put(sk); } +void __mptcp_unaccepted_force_close(struct sock *sk) +{ + sock_set_flag(sk, SOCK_DEAD); + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + __mptcp_destroy_sock(sk); +} + static __poll_t mptcp_check_readable(struct mptcp_sock *msk) { /* Concurrent splices from sk_receive_queue into receive_queue will @@ -3733,6 +3747,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } + + /* Do late cleanup for the first subflow as necessary. Also + * deal with bad peers not doing a complete shutdown. + */ + if (msk->first && + unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { + __mptcp_close_ssk(newsk, msk->first, + mptcp_subflow_ctx(msk->first), 0); + if (unlikely(list_empty(&msk->conn_list))) + inet_sk_state_store(newsk, TCP_CLOSE); + } + release_sock(newsk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 339a6f072989..d6469b6ab38e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -629,10 +629,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); +void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d34588850545..281c1cc8dc8d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -723,9 +723,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk) if (!ctx) return; - subflow_ulp_fallback(ssk, ctx); - if (ctx->conn) - sock_put(ctx->conn); + list_del(&mptcp_subflow_ctx(ssk)->node); + if (inet_csk(ssk)->icsk_ulp_ops) { + subflow_ulp_fallback(ssk, ctx); + if (ctx->conn) + sock_put(ctx->conn); + } kfree_rcu(ctx, rcu); } @@ -1819,6 +1822,77 @@ static void subflow_state_change(struct sock *sk) } } +void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) +{ + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; + struct sock *sk; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); + for (req = queue->rskq_accept_head; req; req = req->dl_next) { + struct mptcp_subflow_context *subflow; + struct sock *ssk = req->sk; + + if (!sk_is_mptcp(ssk)) + continue; + + subflow = mptcp_subflow_ctx(ssk); + if (!subflow || !subflow->conn) + continue; + + /* skip if already in list */ + sk = subflow->conn; + msk = mptcp_sk(sk); + if (msk->dl_next || msk == head) + continue; + + sock_hold(sk); + msk->dl_next = head; + head = msk; + } + spin_unlock_bh(&queue->rskq_lock); + if (!head) + return; + + /* can't acquire the msk socket lock under the subflow one, + * or will cause ABBA deadlock + */ + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { + sk = (struct sock *)msk; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + next = msk->dl_next; + msk->dl_next = NULL; + + __mptcp_unaccepted_force_close(sk); + release_sock(sk); + + /* lockdep will report a false positive ABBA deadlock + * between cancel_work_sync and the listener socket. + * The involved locks belong to different sockets WRT + * the existing AB chain. + * Using a per socket key is problematic as key + * deregistration requires process context and must be + * performed at socket disposal time, in atomic + * context. + * Just tell lockdep to consider the listener socket + * released here. + */ + mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); + mptcp_cancel_work(sk); + mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_); + + sock_put(sk); + } + + /* we are still under the listener msk socket lock */ + lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); +} + static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6004d4b24451..e48ab8dfb541 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3447,6 +3447,64 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) return 0; } +int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_ctx *pctx = (struct nft_ctx *)ctx; + const struct nft_data *data; + int err; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) + return 0; + + data = nft_set_ext_data(ext); + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + pctx->level++; + err = nft_chain_validate(ctx, data->verdict.chain); + if (err < 0) + return err; + pctx->level--; + break; + default: + break; + } + + return 0; +} + +struct nft_set_elem_catchall { + struct list_head list; + struct rcu_head rcu; + void *elem; +}; + +int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + int ret = 0; + + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + ret = nft_setelem_validate(ctx, set, NULL, &elem); + if (ret < 0) + return ret; + } + + return ret; +} + static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla); @@ -4759,12 +4817,6 @@ err_set_name: return err; } -struct nft_set_elem_catchall { - struct list_head list; - struct rcu_head rcu; - void *elem; -}; - static void nft_set_catchall_destroy(const struct nft_ctx *ctx, struct nft_set *set) { @@ -6056,7 +6108,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) + if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) || + (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY])) return -EINVAL; if (flags != 0) { @@ -7052,7 +7105,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, } if (nla[NFTA_OBJ_USERDATA]) { - obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL); + obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL_ACCOUNT); if (obj->udata == NULL) goto err_userdata; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index cae5a6724163..cecf8ab90e58 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -199,37 +199,6 @@ nla_put_failure: return -1; } -static int nft_lookup_validate_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_set_elem *elem) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); - struct nft_ctx *pctx = (struct nft_ctx *)ctx; - const struct nft_data *data; - int err; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - pctx->level++; - err = nft_chain_validate(ctx, data->verdict.chain); - if (err < 0) - return err; - pctx->level--; - break; - default: - break; - } - - return 0; -} - static int nft_lookup_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **d) @@ -245,9 +214,12 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, iter.skip = 0; iter.count = 0; iter.err = 0; - iter.fn = nft_lookup_validate_setelem; + iter.fn = nft_setelem_validate; priv->set->ops->walk(ctx, priv->set, &iter); + if (!iter.err) + iter.err = nft_set_catchall_validate(ctx, priv->set); + if (iter.err < 0) return iter.err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2a6b6be0811b..35785a36c802 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3235,6 +3235,9 @@ int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, err_miss_alloc: tcf_exts_destroy(exts); +#ifdef CONFIG_NET_CLS_ACT + exts->actions = NULL; +#endif return err; } EXPORT_SYMBOL(tcf_exts_init_ex); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index cf5ebe43b3b4..02098a02943e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -421,15 +421,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } else weight = 1; - if (tb[TCA_QFQ_LMAX]) { + if (tb[TCA_QFQ_LMAX]) lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - } else + else lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { + pr_notice("qfq: invalid max length %u\n", lmax); + return -EINVAL; + } + inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; |