From ac28b1ec6135649b5d78b028e47264cb3ebca5ea Mon Sep 17 00:00:00 2001
From: Liu Jian
Date: Thu, 7 Sep 2023 10:57:09 +0800
Subject: net: ipv4: fix one memleak in __inet_del_ifa()

I got the below warning when do fuzzing test:
unregister_netdevice: waiting for bond0 to become free. Usage count = 2

It can be repoduced via:

ip link add bond0 type bond
sysctl -w net.ipv4.conf.bond0.promote_secondaries=1
ip addr add 4.117.174.103/0 scope 0x40 dev bond0
ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0
ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0
ip addr del 4.117.174.103/0 scope 0x40 dev bond0
ip link delete bond0 type bond

In this reproduction test case, an incorrect 'last_prim' is found in
__inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40)
is lost. The memory of the secondary address is leaked and the reference of
in_device and net_device is leaked.

Fix this problem:
Look for 'last_prim' starting at location of the deleted IP and inserting
the promoted IP into the location of 'last_prim'.

Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9cf64ee47dd2..ca0ff15dc8fa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1;
-	struct in_ifaddr *last_prim;
+	struct in_ifaddr __rcu **last_prim;
 	struct in_ifaddr *prev_prom = NULL;
 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
 	ifa1 = rtnl_dereference(*ifap);
-	last_prim = rtnl_dereference(in_dev->ifa_list);
+	last_prim = ifap;
 	if (in_dev->dead)
 		goto no_promotions;
 
@@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 			    ifa1->ifa_scope <= ifa->ifa_scope)
-				last_prim = ifa;
+				last_prim = &ifa->ifa_next;
 
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 			    ifa1->ifa_mask != ifa->ifa_mask ||
@@ -440,9 +440,9 @@ no_promotions:
 
 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
 
-			last_sec = rtnl_dereference(last_prim->ifa_next);
+			last_sec = rtnl_dereference(*last_prim);
 			rcu_assign_pointer(promote->ifa_next, last_sec);
-			rcu_assign_pointer(last_prim->ifa_next, promote);
+			rcu_assign_pointer(*last_prim, promote);
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
-- 
cgit v1.2.3


From ced33ca07d8d99435ca3320c740ea947843005ca Mon Sep 17 00:00:00 2001
From: Juntong Deng
Date: Thu, 7 Sep 2023 00:26:03 +0800
Subject: selftests/net: Improve bind_bhash.sh to accommodate predictable
 network interface names

Starting with v197, systemd uses predictable interface network names,
the traditional interface naming scheme (eth0) is deprecated, therefore
it cannot be assumed that the eth0 interface exists on the host.

This modification makes the bind_bhash test program run in a separate
network namespace and no longer needs to consider the name of the
network interface on the host.

Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/bind_bhash.sh | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh
index ca0292d4b441..a28563bdaae0 100755
--- a/tools/testing/selftests/net/bind_bhash.sh
+++ b/tools/testing/selftests/net/bind_bhash.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NR_FILES=32768
-SAVED_NR_FILES=$(ulimit -n)
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
 
 # default values
 port=443
@@ -36,21 +36,21 @@ while getopts "ha:p:64" opt; do
 done
 
 setup() {
+    ip netns add "${NETNS}"
+    ip -netns "${NETNS}" link add veth0 type veth peer name veth1
+    ip -netns "${NETNS}" link set lo up
+    ip -netns "${NETNS}" link set veth0 up
+    ip -netns "${NETNS}" link set veth1 up
+
     if [[ "$use_v6" == true ]]; then
-	ip addr add $addr_v6 nodad dev eth0
+        ip -netns "${NETNS}" addr add $addr_v6 nodad dev veth0
     else
-	ip addr add $addr_v4 dev lo
+        ip -netns "${NETNS}" addr add $addr_v4 dev lo
     fi
-	ulimit -n $NR_FILES
 }
 
 cleanup() {
-    if [[ "$use_v6" == true ]]; then
-	ip addr del $addr_v6 dev eth0
-    else
-	ip addr del $addr_v4/32 dev lo
-    fi
-    ulimit -n $SAVED_NR_FILES
+    ip netns del "${NETNS}"
 }
 
 if [[ "$addr" != "" ]]; then
@@ -59,8 +59,10 @@ if [[ "$addr" != "" ]]; then
 fi
 setup
 if [[ "$use_v6" == true ]] ; then
-    ./bind_bhash $port "ipv6" $addr_v6
+    ip netns exec "${NETNS}" sh -c \
+        "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv6 ${addr_v6}"
 else
-    ./bind_bhash $port "ipv4" $addr_v4
+    ip netns exec "${NETNS}" sh -c \
+        "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv4 ${addr_v4}"
 fi
 cleanup
-- 
cgit v1.2.3


From e73d1ab6cd7e7190bd891e521d270cd26ad8e40d Mon Sep 17 00:00:00 2001
From: Julia Lawall
Date: Thu, 7 Sep 2023 11:55:12 +0200
Subject: net: bcmasp: add missing of_node_put

for_each_available_child_of_node performs an of_node_get
on each iteration, so a break out of the loop requires an
of_node_put.

This was done using the Coccinelle semantic patch
iterators/for_each_child.cocci

Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/asp2/bcmasp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index d63d321f3e7b..f048e3d45119 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1300,6 +1300,7 @@ static int bcmasp_probe(struct platform_device *pdev)
 		if (!intf) {
 			dev_err(dev, "Cannot create eth interface %d\n", i);
 			bcmasp_remove_intfs(priv);
+			of_node_put(intf_node);
 			goto of_put_exit;
 		}
 		list_add_tail(&intf->list, &priv->intfs);
-- 
cgit v1.2.3


From 281f65d29d6da1a9b6907fb0b145aaf34f4e4822 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan
Date: Thu, 7 Sep 2023 22:03:58 +0800
Subject: net: microchip: vcap api: Fix possible memory leak for
 vcap_dup_rule()

Inject fault When select CONFIG_VCAP_KUNIT_TEST, the below memory leak
occurs. If kzalloc() for duprule succeeds, but the following
kmemdup() fails, the duprule, ckf and caf memory will be leaked. So kfree
them in the error path.

unreferenced object 0xffff122744c50600 (size 192):
  comm "kunit_try_catch", pid 346, jiffies 4294896122 (age 911.812s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00  .'..........,...
    00 00 00 00 00 00 00 00 18 06 c5 44 27 12 ff ff  ...........D'...
  backtrace:
    [<00000000394b0db8>] __kmem_cache_alloc_node+0x274/0x2f8
    [<0000000001bedc67>] kmalloc_trace+0x38/0x88
    [<00000000b0612f98>] vcap_dup_rule+0x50/0x460
    [<000000005d2d3aca>] vcap_add_rule+0x8cc/0x1038
    [<00000000eef9d0f8>] test_vcap_xn_rule_creator.constprop.0.isra.0+0x238/0x494
    [<00000000cbda607b>] vcap_api_rule_remove_in_front_test+0x1ac/0x698
    [<00000000c8766299>] kunit_try_run_case+0xe0/0x20c
    [<00000000c4fe9186>] kunit_generic_run_threadfn_adapter+0x50/0x94
    [<00000000f6864acf>] kthread+0x2e8/0x374
    [<0000000022e639b3>] ret_from_fork+0x10/0x20

Fixes: 814e7693207f ("net: microchip: vcap api: Add a storage state to a VCAP rule")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c
index 300fe1a93dce..ef980e4e5bc2 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c
@@ -1021,18 +1021,32 @@ static struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri,
 	list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) {
 		newckf = kmemdup(ckf, sizeof(*newckf), GFP_KERNEL);
 		if (!newckf)
-			return ERR_PTR(-ENOMEM);
+			goto err;
 		list_add_tail(&newckf->ctrl.list, &duprule->data.keyfields);
 	}
 
 	list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) {
 		newcaf = kmemdup(caf, sizeof(*newcaf), GFP_KERNEL);
 		if (!newcaf)
-			return ERR_PTR(-ENOMEM);
+			goto err;
 		list_add_tail(&newcaf->ctrl.list, &duprule->data.actionfields);
 	}
 
 	return duprule;
+
+err:
+	list_for_each_entry_safe(ckf, newckf, &duprule->data.keyfields, ctrl.list) {
+		list_del(&ckf->ctrl.list);
+		kfree(ckf);
+	}
+
+	list_for_each_entry_safe(caf, newcaf, &duprule->data.actionfields, ctrl.list) {
+		list_del(&caf->ctrl.list);
+		kfree(caf);
+	}
+
+	kfree(duprule);
+	return ERR_PTR(-ENOMEM);
 }
 
 static void vcap_apply_width(u8 *dst, int width, int bytes)
-- 
cgit v1.2.3


From 88e69af061f2e061a68751ef9cad47a674527a1b Mon Sep 17 00:00:00 2001
From: Ratheesh Kannoth
Date: Fri, 8 Sep 2023 08:23:09 +0530
Subject: octeontx2-pf: Fix page pool cache index corruption.

The access to page pool `cache' array and the `count' variable
is not locked. Page pool cache access is fine as long as there
is only one consumer per pool.

octeontx2 driver fills in rx buffers from page pool in NAPI context.
If system is stressed and could not allocate buffers, refiiling work
will be delegated to a delayed workqueue. This means that there are
two cosumers to the page pool cache.

Either workqueue or IRQ/NAPI can be run on other CPU. This will lead
to lock less access, hence corruption of cache pool indexes.

To fix this issue, NAPI is rescheduled from workqueue context to refill
rx buffers.

Fixes: b2e3406a38f0 ("octeontx2-pf: Add support for page pool")
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c |  6 ++-
 drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h |  2 +-
 .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 43 +++-------------------
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   |  3 +-
 .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c   |  7 ++--
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 30 ++++++++++++---
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h |  4 +-
 7 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index 826f691de259..a4a258da8dd5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -107,12 +107,13 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
 }
 
 #define NPA_MAX_BURST 16
-void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
+int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 {
 	struct otx2_nic *pfvf = dev;
+	int cnt = cq->pool_ptrs;
 	u64 ptrs[NPA_MAX_BURST];
-	int num_ptrs = 1;
 	dma_addr_t bufptr;
+	int num_ptrs = 1;
 
 	/* Refill pool with new buffers */
 	while (cq->pool_ptrs) {
@@ -131,6 +132,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 			num_ptrs = 1;
 		}
 	}
+	return cnt - cq->pool_ptrs;
 }
 
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
index 8ae96815865e..c1861f7de254 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
@@ -24,7 +24,7 @@ static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
 	return weight;
 }
 
-void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
+int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
 int cn10k_lmtst_init(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 8511906cb4e2..997fedac3a98 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -574,20 +574,8 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
 int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
 		      dma_addr_t *dma)
 {
-	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) {
-		struct refill_work *work;
-		struct delayed_work *dwork;
-
-		work = &pfvf->refill_wrk[cq->cq_idx];
-		dwork = &work->pool_refill_work;
-		/* Schedule a task if no other task is running */
-		if (!cq->refill_task_sched) {
-			cq->refill_task_sched = true;
-			schedule_delayed_work(dwork,
-					      msecs_to_jiffies(100));
-		}
+	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma)))
 		return -ENOMEM;
-	}
 	return 0;
 }
 
@@ -1082,39 +1070,20 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
 static void otx2_pool_refill_task(struct work_struct *work)
 {
 	struct otx2_cq_queue *cq;
-	struct otx2_pool *rbpool;
 	struct refill_work *wrk;
-	int qidx, free_ptrs = 0;
 	struct otx2_nic *pfvf;
-	dma_addr_t bufptr;
+	int qidx;
 
 	wrk = container_of(work, struct refill_work, pool_refill_work.work);
 	pfvf = wrk->pf;
 	qidx = wrk - pfvf->refill_wrk;
 	cq = &pfvf->qset.cq[qidx];
-	rbpool = cq->rbpool;
-	free_ptrs = cq->pool_ptrs;
 
-	while (cq->pool_ptrs) {
-		if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) {
-			/* Schedule a WQ if we fails to free atleast half of the
-			 * pointers else enable napi for this RQ.
-			 */
-			if (!((free_ptrs - cq->pool_ptrs) > free_ptrs / 2)) {
-				struct delayed_work *dwork;
-
-				dwork = &wrk->pool_refill_work;
-				schedule_delayed_work(dwork,
-						      msecs_to_jiffies(100));
-			} else {
-				cq->refill_task_sched = false;
-			}
-			return;
-		}
-		pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
-		cq->pool_ptrs--;
-	}
 	cq->refill_task_sched = false;
+
+	local_bh_disable();
+	napi_schedule(wrk->napi);
+	local_bh_enable();
 }
 
 int otx2_config_nix_queues(struct otx2_nic *pfvf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 4c6032ee7800..c04a8ee53a82 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -302,6 +302,7 @@ struct flr_work {
 struct refill_work {
 	struct delayed_work pool_refill_work;
 	struct otx2_nic *pf;
+	struct napi_struct *napi;
 };
 
 /* PTPv2 originTimestamp structure */
@@ -370,7 +371,7 @@ struct dev_hw_ops {
 	int	(*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura);
 	void	(*sqe_flush)(void *dev, struct otx2_snd_queue *sq,
 			     int size, int qidx);
-	void	(*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq);
+	int	(*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq);
 	void	(*aura_freeptr)(void *dev, int aura, u64 buf);
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 70b9065f7d10..6daf4d58c25d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1943,6 +1943,10 @@ int otx2_stop(struct net_device *netdev)
 
 	netif_tx_disable(netdev);
 
+	for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++)
+		cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work);
+	devm_kfree(pf->dev, pf->refill_wrk);
+
 	otx2_free_hw_resources(pf);
 	otx2_free_cints(pf, pf->hw.cint_cnt);
 	otx2_disable_napi(pf);
@@ -1950,9 +1954,6 @@ int otx2_stop(struct net_device *netdev)
 	for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
 		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
 
-	for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++)
-		cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work);
-	devm_kfree(pf->dev, pf->refill_wrk);
 
 	kfree(qset->sq);
 	kfree(qset->cq);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index e369baf11530..e77d43848955 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -424,9 +424,10 @@ process_cqe:
 	return processed_cqe;
 }
 
-void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
+int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 {
 	struct otx2_nic *pfvf = dev;
+	int cnt = cq->pool_ptrs;
 	dma_addr_t bufptr;
 
 	while (cq->pool_ptrs) {
@@ -435,6 +436,8 @@ void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 		otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
 		cq->pool_ptrs--;
 	}
+
+	return cnt - cq->pool_ptrs;
 }
 
 static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
@@ -521,6 +524,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
 	struct otx2_cq_queue *cq;
 	struct otx2_qset *qset;
 	struct otx2_nic *pfvf;
+	int filled_cnt = -1;
 
 	cq_poll = container_of(napi, struct otx2_cq_poll, napi);
 	pfvf = (struct otx2_nic *)cq_poll->dev;
@@ -541,7 +545,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
 	}
 
 	if (rx_cq && rx_cq->pool_ptrs)
-		pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
+		filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
 	/* Clear the IRQ */
 	otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0));
 
@@ -561,9 +565,25 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
 				otx2_config_irq_coalescing(pfvf, i);
 		}
 
-		/* Re-enable interrupts */
-		otx2_write64(pfvf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
-			     BIT_ULL(0));
+		if (unlikely(!filled_cnt)) {
+			struct refill_work *work;
+			struct delayed_work *dwork;
+
+			work = &pfvf->refill_wrk[cq->cq_idx];
+			dwork = &work->pool_refill_work;
+			/* Schedule a task if no other task is running */
+			if (!cq->refill_task_sched) {
+				work->napi = napi;
+				cq->refill_task_sched = true;
+				schedule_delayed_work(dwork,
+						      msecs_to_jiffies(100));
+			}
+		} else {
+			/* Re-enable interrupts */
+			otx2_write64(pfvf,
+				     NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
+				     BIT_ULL(0));
+		}
 	}
 	return workdone;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 9e3bfbe5c480..a82ffca8ce1b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -170,6 +170,6 @@ void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq,
 		     int size, int qidx);
 void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq,
 		    int size, int qidx);
-void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
-void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
+int otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
+int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
 #endif /* OTX2_TXRX_H */
-- 
cgit v1.2.3


From 6912e724832c47bb381eb1bd1e483ec8df0d0f0f Mon Sep 17 00:00:00 2001
From: Guangguan Wang
Date: Fri, 8 Sep 2023 11:31:42 +0800
Subject: net/smc: bugfix for smcr v2 server connect success statistic

In the macro SMC_STAT_SERV_SUCC_INC, the smcd_version is used
to determin whether to increase the v1 statistic or the v2
statistic. It is correct for SMCD. But for SMCR, smcr_version
should be used.

Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_stats.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index b60fe1eb37ab..aa8928975cc6 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -243,8 +243,9 @@ while (0)
 #define SMC_STAT_SERV_SUCC_INC(net, _ini) \
 do { \
 	typeof(_ini) i = (_ini); \
-	bool is_v2 = (i->smcd_version & SMC_V2); \
 	bool is_smcd = (i->is_smcd); \
+	u8 version = is_smcd ? i->smcd_version : i->smcr_version; \
+	bool is_v2 = (version & SMC_V2); \
 	typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
 	if (is_v2 && is_smcd) \
 		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
-- 
cgit v1.2.3


From f5146e3ef0a9eea405874b36178c19a4863b8989 Mon Sep 17 00:00:00 2001
From: Guangguan Wang
Date: Fri, 8 Sep 2023 11:31:43 +0800
Subject: net/smc: use smc_lgr_list.lock to protect smc_lgr_list.list iterate
 in smcr_port_add

While doing smcr_port_add, there maybe linkgroup add into or delete
from smc_lgr_list.list at the same time, which may result kernel crash.
So, use smc_lgr_list.lock to protect smc_lgr_list.list iterate in
smcr_port_add.

The crash calltrace show below:
BUG: kernel NULL pointer dereference, address: 0000000000000000
PGD 0 P4D 0
Oops: 0000 [#1] SMP NOPTI
CPU: 0 PID: 559726 Comm: kworker/0:92 Kdump: loaded Tainted: G
Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 449e491 04/01/2014
Workqueue: events smc_ib_port_event_work [smc]
RIP: 0010:smcr_port_add+0xa6/0xf0 [smc]
RSP: 0000:ffffa5a2c8f67de0 EFLAGS: 00010297
RAX: 0000000000000001 RBX: ffff9935e0650000 RCX: 0000000000000000
RDX: 0000000000000010 RSI: ffff9935e0654290 RDI: ffff9935c8560000
RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9934c0401918
R10: 0000000000000000 R11: ffffffffb4a5c278 R12: ffff99364029aae4
R13: ffff99364029aa00 R14: 00000000ffffffed R15: ffff99364029ab08
FS:  0000000000000000(0000) GS:ffff994380600000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000f06a10003 CR4: 0000000002770ef0
PKRU: 55555554
Call Trace:
 smc_ib_port_event_work+0x18f/0x380 [smc]
 process_one_work+0x19b/0x340
 worker_thread+0x30/0x370
 ? process_one_work+0x340/0x340
 kthread+0x114/0x130
 ? __kthread_cancel_work+0x50/0x50
 ret_from_fork+0x1f/0x30

Fixes: 1f90a05d9ff9 ("net/smc: add smcr_port_add() and smcr_link_up() processing")
Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bd01dd31e4bd..d520ee62c8ec 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1662,6 +1662,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
 {
 	struct smc_link_group *lgr, *n;
 
+	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
 		struct smc_link *link;
 
@@ -1680,6 +1681,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
 		if (link)
 			smc_llc_add_link_local(link);
 	}
+	spin_unlock_bh(&smc_lgr_list.lock);
 }
 
 /* link is down - switch connections to alternate link,
-- 
cgit v1.2.3


From fa60b8163816f194786f3ee334c9a458da7699c6 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch
Date: Thu, 7 Sep 2023 12:46:31 +0200
Subject: net: stmmac: fix handling of zero coalescing tx-usecs

Setting ethtool -C eth0 tx-usecs 0 is supposed to disable the use of the
coalescing timer but currently it gets programmed with zero delay
instead.

Disable the use of the coalescing timer if tx-usecs is zero by
preventing it from being restarted.  Note that to keep things simple we
don't start/stop the timer when the coalescing settings are changed, but
just let that happen on the next transmit or timer expiry.

Fixes: 8fce33317023 ("net: stmmac: Rework coalesce timer and fix multi-queue races")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9a3182b9e767..2206789802bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2704,9 +2704,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	/* We still have pending packets, let's call for a new scheduling */
 	if (tx_q->dirty_tx != tx_q->cur_tx)
-		hrtimer_start(&tx_q->txtimer,
-			      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
-			      HRTIMER_MODE_REL);
+		stmmac_tx_timer_arm(priv, queue);
 
 	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
 	tx_q->txq_stats.tx_packets += tx_packets;
@@ -2995,9 +2993,13 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
+	u32 tx_coal_timer = priv->tx_coal_timer[queue];
+
+	if (!tx_coal_timer)
+		return;
 
 	hrtimer_start(&tx_q->txtimer,
-		      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
+		      STMMAC_COAL_TIMER(tx_coal_timer),
 		      HRTIMER_MODE_REL);
 }
 
-- 
cgit v1.2.3


From 9b90aca97f6d5255ca41e716720d138b878cd034 Mon Sep 17 00:00:00 2001
From: Hangyu Hua
Date: Fri, 8 Sep 2023 14:19:48 +0800
Subject: net: ethernet: bcmasp: fix possible OOB write in
 bcmasp_netfilt_get_all_active()

rule_locs is allocated in ethtool_get_rxnfc and the size is determined by
rule_cnt from user space. So rule_cnt needs to be check before using
rule_locs to avoid OOB writing or NULL pointer dereference.

Fixes: c5d511c49587 ("net: bcmasp: Add support for wake on net filters")
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/asp2/bcmasp.c         | 9 +++++++--
 drivers/net/ethernet/broadcom/asp2/bcmasp.h         | 4 ++--
 drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index f048e3d45119..41a6098eb0c2 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -528,13 +528,16 @@ void bcmasp_netfilt_suspend(struct bcmasp_intf *intf)
 				  ASP_RX_FILTER_BLK_CTRL);
 }
 
-void bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
-				   u32 *rule_cnt)
+int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
+				  u32 *rule_cnt)
 {
 	struct bcmasp_priv *priv = intf->parent;
 	int j = 0, i;
 
 	for (i = 0; i < NUM_NET_FILTERS; i++) {
+		if (j == *rule_cnt)
+			return -EMSGSIZE;
+
 		if (!priv->net_filters[i].claimed ||
 		    priv->net_filters[i].port != intf->port)
 			continue;
@@ -548,6 +551,8 @@ void bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
 	}
 
 	*rule_cnt = j;
+
+	return 0;
 }
 
 int bcmasp_netfilt_get_active(struct bcmasp_intf *intf)
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
index 5b512f7f5e94..ec90add6b03e 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h
@@ -577,8 +577,8 @@ void bcmasp_netfilt_release(struct bcmasp_intf *intf,
 
 int bcmasp_netfilt_get_active(struct bcmasp_intf *intf);
 
-void bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
-				   u32 *rule_cnt);
+int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
+				  u32 *rule_cnt);
 
 void bcmasp_netfilt_suspend(struct bcmasp_intf *intf);
 
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
index c4f1604d5ab3..ce6a3d56fb23 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c
@@ -335,7 +335,7 @@ static int bcmasp_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		err = bcmasp_flow_get(intf, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		bcmasp_netfilt_get_all_active(intf, rule_locs, &cmd->rule_cnt);
+		err = bcmasp_netfilt_get_all_active(intf, rule_locs, &cmd->rule_cnt);
 		cmd->data = NUM_NET_FILTERS;
 		break;
 	default:
-- 
cgit v1.2.3


From 51fe0a470543f345e3c62b6798929de3ddcedc1d Mon Sep 17 00:00:00 2001
From: Hangyu Hua
Date: Fri, 8 Sep 2023 14:19:49 +0800
Subject: net: ethernet: mvpp2_main: fix possible OOB write in
 mvpp2_ethtool_get_rxnfc()

rules is allocated in ethtool_get_rxnfc and the size is determined by
rule_cnt from user space. So rule_cnt needs to be check before using
rules to avoid OOB writing or NULL pointer dereference.

Fixes: 90b509b39ac9 ("net: mvpp2: cls: Add Classification offload support")
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Reviewed-by: Marcin Wojtas <mw@semihalf.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index eb74ccddb440..21c3f9b015c8 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5586,6 +5586,11 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
 		break;
 	case ETHTOOL_GRXCLSRLALL:
 		for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+			if (loc == info->rule_cnt) {
+				ret = -EMSGSIZE;
+				break;
+			}
+
 			if (port->rfs_rules[i])
 				rules[loc++] = i;
 		}
-- 
cgit v1.2.3


From e4c79810755f66c9a933ca810da2724133b1165a Mon Sep 17 00:00:00 2001
From: Hangyu Hua
Date: Fri, 8 Sep 2023 14:19:50 +0800
Subject: net: ethernet: mtk_eth_soc: fix possible NULL pointer dereference in
 mtk_hwlro_get_fdir_all()

rule_locs is allocated in ethtool_get_rxnfc and the size is determined by
rule_cnt from user space. So rule_cnt needs to be check before using
rule_locs to avoid NULL pointer dereference.

Fixes: 7aab747e5563 ("net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO")
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6ad42e3b488f..2372ce8c2580 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2994,6 +2994,9 @@ static int mtk_hwlro_get_fdir_all(struct net_device *dev,
 	int i;
 
 	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+
 		if (mac->hwlro_ip[i]) {
 			rule_locs[cnt] = i;
 			cnt++;
-- 
cgit v1.2.3


From 484b4833c604c0adcf19eac1ca14b60b757355b5 Mon Sep 17 00:00:00 2001
From: Ziyang Xuan
Date: Fri, 8 Sep 2023 18:17:52 +0800
Subject: hsr: Fix uninit-value access in fill_frame_info()

Syzbot reports the following uninit-value access problem.

=====================================================
BUG: KMSAN: uninit-value in fill_frame_info net/hsr/hsr_forward.c:601 [inline]
BUG: KMSAN: uninit-value in hsr_forward_skb+0x9bd/0x30f0 net/hsr/hsr_forward.c:616
 fill_frame_info net/hsr/hsr_forward.c:601 [inline]
 hsr_forward_skb+0x9bd/0x30f0 net/hsr/hsr_forward.c:616
 hsr_dev_xmit+0x192/0x330 net/hsr/hsr_device.c:223
 __netdev_start_xmit include/linux/netdevice.h:4889 [inline]
 netdev_start_xmit include/linux/netdevice.h:4903 [inline]
 xmit_one net/core/dev.c:3544 [inline]
 dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3560
 __dev_queue_xmit+0x34d0/0x52a0 net/core/dev.c:4340
 dev_queue_xmit include/linux/netdevice.h:3082 [inline]
 packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276
 packet_snd net/packet/af_packet.c:3087 [inline]
 packet_sendmsg+0x8b1d/0x9f30 net/packet/af_packet.c:3119
 sock_sendmsg_nosec net/socket.c:730 [inline]
 sock_sendmsg net/socket.c:753 [inline]
 __sys_sendto+0x781/0xa30 net/socket.c:2176
 __do_sys_sendto net/socket.c:2188 [inline]
 __se_sys_sendto net/socket.c:2184 [inline]
 __ia32_sys_sendto+0x11f/0x1c0 net/socket.c:2184
 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
 __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
 entry_SYSENTER_compat_after_hwframe+0x70/0x82

Uninit was created at:
 slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767
 slab_alloc_node mm/slub.c:3478 [inline]
 kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523
 kmalloc_reserve+0x148/0x470 net/core/skbuff.c:559
 __alloc_skb+0x318/0x740 net/core/skbuff.c:644
 alloc_skb include/linux/skbuff.h:1286 [inline]
 alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6299
 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2794
 packet_alloc_skb net/packet/af_packet.c:2936 [inline]
 packet_snd net/packet/af_packet.c:3030 [inline]
 packet_sendmsg+0x70e8/0x9f30 net/packet/af_packet.c:3119
 sock_sendmsg_nosec net/socket.c:730 [inline]
 sock_sendmsg net/socket.c:753 [inline]
 __sys_sendto+0x781/0xa30 net/socket.c:2176
 __do_sys_sendto net/socket.c:2188 [inline]
 __se_sys_sendto net/socket.c:2184 [inline]
 __ia32_sys_sendto+0x11f/0x1c0 net/socket.c:2184
 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline]
 __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178
 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203
 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246
 entry_SYSENTER_compat_after_hwframe+0x70/0x82

It is because VLAN not yet supported in hsr driver. Return error
when protocol is ETH_P_8021Q in fill_frame_info() now to fix it.

Fixes: 451d8123f897 ("net: prp: add packet handling support")
Reported-by: syzbot+bf7e6250c7ce248f3ec9@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bf7e6250c7ce248f3ec9
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_forward.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 629daacc9607..b71dab630a87 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -594,6 +594,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 		proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
 		/* FIXME: */
 		netdev_warn_once(skb->dev, "VLAN not yet supported");
+		return -EINVAL;
 	}
 
 	frame->is_from_san = false;
-- 
cgit v1.2.3


From 32530dba1bd48da4437d18d9a8dbc9d2826938a6 Mon Sep 17 00:00:00 2001
From: Ciprian Regus
Date: Fri, 8 Sep 2023 15:58:08 +0300
Subject: net:ethernet:adi:adin1110: Fix forwarding offload

Currently, when a new fdb entry is added (with both ports of the
ADIN2111 bridged), the driver configures the MAC filters for the wrong
port, which results in the forwarding being done by the host, and not
actually hardware offloaded.

The ADIN2111 offloads the forwarding by setting filters on the
destination MAC address of incoming frames. Based on these, they may be
routed to the other port. Thus, if a frame has to be forwarded from port
1 to port 2, the required configuration for the ADDR_FILT_UPRn register
should set the APPLY2PORT1 bit (instead of APPLY2PORT2, as it's
currently the case).

Fixes: bc93e19d088b ("net: ethernet: adi: Add ADIN1110 support")
Signed-off-by: Ciprian Regus <ciprian.regus@analog.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/adi/adin1110.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c
index 1c009b485188..ca66b747b7c5 100644
--- a/drivers/net/ethernet/adi/adin1110.c
+++ b/drivers/net/ethernet/adi/adin1110.c
@@ -1385,7 +1385,7 @@ static int adin1110_fdb_add(struct adin1110_port_priv *port_priv,
 		return -ENOMEM;
 
 	other_port = priv->ports[!port_priv->nr];
-	port_rules = adin1110_port_rules(port_priv, false, true);
+	port_rules = adin1110_port_rules(other_port, false, true);
 	eth_broadcast_addr(mask);
 
 	return adin1110_write_mac_address(other_port, mac_nr, (u8 *)fdb->addr,
-- 
cgit v1.2.3


From 02c652f5465011126152bbd93b6a582a1d0c32f1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean
Date: Fri, 8 Sep 2023 16:33:48 +0300
Subject: net: dsa: sja1105: hide all multicast addresses from "bridge fdb
 show"

Commit 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to
device") has partially hidden some multicast entries from showing up in
the "bridge fdb show" output, but it wasn't enough. Addresses which are
added through "bridge mdb add" still show up. Hide them all.

Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index a23d980d28f5..11c917d5ce43 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1868,13 +1868,14 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
 		if (!(l2_lookup.destports & BIT(port)))
 			continue;
 
-		/* We need to hide the FDB entry for unknown multicast */
-		if (l2_lookup.macaddr == SJA1105_UNKNOWN_MULTICAST &&
-		    l2_lookup.mask_macaddr == SJA1105_UNKNOWN_MULTICAST)
-			continue;
-
 		u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
+		/* Hardware FDB is shared for fdb and mdb, "bridge fdb show"
+		 * only wants to see unicast
+		 */
+		if (is_multicast_ether_addr(macaddr))
+			continue;
+
 		/* We need to hide the dsa_8021q VLANs from the user. */
 		if (vid_is_dsa_8021q(l2_lookup.vlanid))
 			l2_lookup.vlanid = 0;
-- 
cgit v1.2.3


From c956798062b5a308db96e75157747291197f0378 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean
Date: Fri, 8 Sep 2023 16:33:49 +0300
Subject: net: dsa: sja1105: propagate exact error code from
 sja1105_dynamic_config_poll_valid()

Currently, sja1105_dynamic_config_wait_complete() returns either 0 or
-ETIMEDOUT, because it just looks at the read_poll_timeout() return code.

There will be future changes which move some more checks to
sja1105_dynamic_config_poll_valid(). It is important that we propagate
their exact return code (-ENOENT, -EINVAL), because callers of
sja1105_dynamic_config_read() depend on them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 7729d3f8b7f5..93d47dab8d3e 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -1211,13 +1211,14 @@ sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
 				     struct sja1105_dyn_cmd *cmd,
 				     const struct sja1105_dynamic_table_ops *ops)
 {
-	int rc;
-
-	return read_poll_timeout(sja1105_dynamic_config_poll_valid,
-				 rc, rc != -EAGAIN,
-				 SJA1105_DYNAMIC_CONFIG_SLEEP_US,
-				 SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
-				 false, priv, cmd, ops);
+	int err, rc;
+
+	err = read_poll_timeout(sja1105_dynamic_config_poll_valid,
+				rc, rc != -EAGAIN,
+				SJA1105_DYNAMIC_CONFIG_SLEEP_US,
+				SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
+				false, priv, cmd, ops);
+	return err < 0 ? err : rc;
 }
 
 /* Provides read access to the settings through the dynamic interface
-- 
cgit v1.2.3


From 7cef293b9a634a05fcce9e1df4aee3aeed023345 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean
Date: Fri, 8 Sep 2023 16:33:50 +0300
Subject: net: dsa: sja1105: fix multicast forwarding working only for last
 added mdb entry

The commit cited in Fixes: did 2 things: it refactored the read-back
polling from sja1105_dynamic_config_read() into a new function,
sja1105_dynamic_config_wait_complete(), and it called that from
sja1105_dynamic_config_write() too.

What is problematic is the refactoring.

The refactored code from sja1105_dynamic_config_poll_valid() works like
the previous one, but the problem is that it uses another packed_buf[]
SPI buffer, and there was code at the end of sja1105_dynamic_config_read()
which was relying on the read-back packed_buf[]:

	/* Don't dereference possibly NULL pointer - maybe caller
	 * only wanted to see whether the entry existed or not.
	 */
	if (entry)
		ops->entry_packing(packed_buf, entry, UNPACK);

After the change, the packed_buf[] that this code sees is no longer the
entry read back from hardware, but the original entry that the caller
passed to the sja1105_dynamic_config_read(), packed into this buffer.

This difference is the most notable with the SJA1105_SEARCH uses from
sja1105pqrs_fdb_add() - used for both fdb and mdb. There, we have logic
added by commit 728db843df88 ("net: dsa: sja1105: ignore the FDB entry
for unknown multicast when adding a new address") to figure out whether
the address we're trying to add matches on any existing hardware entry,
with the exception of the catch-all multicast address.

That logic was broken, because with sja1105_dynamic_config_read() not
working properly, it doesn't return us the entry read back from
hardware, but the entry that we passed to it. And, since for multicast,
a match will always exist, it will tell us that any mdb entry already
exists at index=0 L2 Address Lookup table. It is index=0 because the
caller doesn't know the index - it wants to find it out, and
sja1105_dynamic_config_read() does:

	if (index < 0) { // SJA1105_SEARCH
		/* Avoid copying a signed negative number to an u64 */
		cmd.index = 0; // <- this
		cmd.search = true;
	} else {
		cmd.index = index;
		cmd.search = false;
	}

So, to the caller of sja1105_dynamic_config_read(), the returned info
looks entirely legit, and it will add all mdb entries to FDB index 0.
There, they will always overwrite each other (not to mention,
potentially they can also overwrite a pre-existing bridge fdb entry),
and the user-visible impact will be that only the last mdb entry will be
forwarded as it should. The others won't (will be flooded or dropped,
depending on the egress flood settings).

Fixing is a bit more complicated, and involves either passing the same
packed_buf[] to sja1105_dynamic_config_wait_complete(), or moving all
the extra processing on the packed_buf[] to
sja1105_dynamic_config_wait_complete(). I've opted for the latter,
because it makes sja1105_dynamic_config_wait_complete() a bit more
self-contained.

Fixes: df405910ab9f ("net: dsa: sja1105: wait for dynamic config command completion on writes too")
Reported-by: Yanan Yang <yanan.yang@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 80 +++++++++++-------------
 1 file changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 93d47dab8d3e..984c0e604e8d 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -1175,18 +1175,15 @@ const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
 
 static int
 sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
-				  struct sja1105_dyn_cmd *cmd,
-				  const struct sja1105_dynamic_table_ops *ops)
+				  const struct sja1105_dynamic_table_ops *ops,
+				  void *entry, bool check_valident,
+				  bool check_errors)
 {
 	u8 packed_buf[SJA1105_MAX_DYN_CMD_SIZE] = {};
+	struct sja1105_dyn_cmd cmd = {};
 	int rc;
 
-	/* We don't _need_ to read the full entry, just the command area which
-	 * is a fixed SJA1105_SIZE_DYN_CMD. But our cmd_packing() API expects a
-	 * buffer that contains the full entry too. Additionally, our API
-	 * doesn't really know how many bytes into the buffer does the command
-	 * area really begin. So just read back the whole entry.
-	 */
+	/* Read back the whole entry + command structure. */
 	rc = sja1105_xfer_buf(priv, SPI_READ, ops->addr, packed_buf,
 			      ops->packed_size);
 	if (rc)
@@ -1195,11 +1192,25 @@ sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
 	/* Unpack the command structure, and return it to the caller in case it
 	 * needs to perform further checks on it (VALIDENT).
 	 */
-	memset(cmd, 0, sizeof(*cmd));
-	ops->cmd_packing(packed_buf, cmd, UNPACK);
+	ops->cmd_packing(packed_buf, &cmd, UNPACK);
 
 	/* Hardware hasn't cleared VALID => still working on it */
-	return cmd->valid ? -EAGAIN : 0;
+	if (cmd.valid)
+		return -EAGAIN;
+
+	if (check_valident && !cmd.valident && !(ops->access & OP_VALID_ANYWAY))
+		return -ENOENT;
+
+	if (check_errors && cmd.errors)
+		return -EINVAL;
+
+	/* Don't dereference possibly NULL pointer - maybe caller
+	 * only wanted to see whether the entry existed or not.
+	 */
+	if (entry)
+		ops->entry_packing(packed_buf, entry, UNPACK);
+
+	return 0;
 }
 
 /* Poll the dynamic config entry's control area until the hardware has
@@ -1208,8 +1219,9 @@ sja1105_dynamic_config_poll_valid(struct sja1105_private *priv,
  */
 static int
 sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
-				     struct sja1105_dyn_cmd *cmd,
-				     const struct sja1105_dynamic_table_ops *ops)
+				     const struct sja1105_dynamic_table_ops *ops,
+				     void *entry, bool check_valident,
+				     bool check_errors)
 {
 	int err, rc;
 
@@ -1217,7 +1229,8 @@ sja1105_dynamic_config_wait_complete(struct sja1105_private *priv,
 				rc, rc != -EAGAIN,
 				SJA1105_DYNAMIC_CONFIG_SLEEP_US,
 				SJA1105_DYNAMIC_CONFIG_TIMEOUT_US,
-				false, priv, cmd, ops);
+				false, priv, ops, entry, check_valident,
+				check_errors);
 	return err < 0 ? err : rc;
 }
 
@@ -1287,25 +1300,14 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
 	mutex_lock(&priv->dynamic_config_lock);
 	rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
 			      ops->packed_size);
-	if (rc < 0) {
-		mutex_unlock(&priv->dynamic_config_lock);
-		return rc;
-	}
-
-	rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
-	mutex_unlock(&priv->dynamic_config_lock);
 	if (rc < 0)
-		return rc;
+		goto out;
 
-	if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
-		return -ENOENT;
+	rc = sja1105_dynamic_config_wait_complete(priv, ops, entry, true, false);
+out:
+	mutex_unlock(&priv->dynamic_config_lock);
 
-	/* Don't dereference possibly NULL pointer - maybe caller
-	 * only wanted to see whether the entry existed or not.
-	 */
-	if (entry)
-		ops->entry_packing(packed_buf, entry, UNPACK);
-	return 0;
+	return rc;
 }
 
 int sja1105_dynamic_config_write(struct sja1105_private *priv,
@@ -1357,22 +1359,14 @@ int sja1105_dynamic_config_write(struct sja1105_private *priv,
 	mutex_lock(&priv->dynamic_config_lock);
 	rc = sja1105_xfer_buf(priv, SPI_WRITE, ops->addr, packed_buf,
 			      ops->packed_size);
-	if (rc < 0) {
-		mutex_unlock(&priv->dynamic_config_lock);
-		return rc;
-	}
-
-	rc = sja1105_dynamic_config_wait_complete(priv, &cmd, ops);
-	mutex_unlock(&priv->dynamic_config_lock);
 	if (rc < 0)
-		return rc;
+		goto out;
 
-	cmd = (struct sja1105_dyn_cmd) {0};
-	ops->cmd_packing(packed_buf, &cmd, UNPACK);
-	if (cmd.errors)
-		return -EINVAL;
+	rc = sja1105_dynamic_config_wait_complete(priv, ops, NULL, false, true);
+out:
+	mutex_unlock(&priv->dynamic_config_lock);
 
-	return 0;
+	return rc;
 }
 
 static u8 sja1105_crc8_add(u8 crc, u8 byte, u8 poly)
-- 
cgit v1.2.3


From ea32690daf4fa525dc5a4d164bd00ed8c756e1c6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean
Date: Fri, 8 Sep 2023 16:33:51 +0300
Subject: net: dsa: sja1105: serialize sja1105_port_mcast_flood() with other
 FDB accesses

sja1105_fdb_add() runs from the dsa_owq, and sja1105_port_mcast_flood()
runs from switchdev_deferred_process_work(). Prior to the blamed commit,
they used to be indirectly serialized through the rtnl_lock(), which
no longer holds true because dsa_owq dropped that.

So, it is now possible that we traverse the static config BLK_IDX_L2_LOOKUP
elements concurrently compared to when we change them, in
sja1105_static_fdb_change(). That is not ideal, since it might result in
data corruption.

Introduce a mutex which serializes accesses to the hardware FDB and to
the static config elements for the L2 Address Lookup table.

I can't find a good reason to add locking around sja1105_fdb_dump().
I'll add it later if needed.

Fixes: 0faf890fc519 ("net: dsa: drop rtnl_lock from dsa_slave_switchdev_event_work")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  2 ++
 drivers/net/dsa/sja1105/sja1105_main.c | 56 ++++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 0617d5ccd3ff..8c66d3bf61f0 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -266,6 +266,8 @@ struct sja1105_private {
 	 * the switch doesn't confuse them with one another.
 	 */
 	struct mutex mgmt_lock;
+	/* Serializes accesses to the FDB */
+	struct mutex fdb_lock;
 	/* PTP two-step TX timestamp ID, and its serialization lock */
 	spinlock_t ts_id_lock;
 	u8 ts_id;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 11c917d5ce43..cefd72617af4 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1798,6 +1798,7 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
 			   struct dsa_db db)
 {
 	struct sja1105_private *priv = ds->priv;
+	int rc;
 
 	if (!vid) {
 		switch (db.type) {
@@ -1812,12 +1813,16 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
 		}
 	}
 
-	return priv->info->fdb_add_cmd(ds, port, addr, vid);
+	mutex_lock(&priv->fdb_lock);
+	rc = priv->info->fdb_add_cmd(ds, port, addr, vid);
+	mutex_unlock(&priv->fdb_lock);
+
+	return rc;
 }
 
-static int sja1105_fdb_del(struct dsa_switch *ds, int port,
-			   const unsigned char *addr, u16 vid,
-			   struct dsa_db db)
+static int __sja1105_fdb_del(struct dsa_switch *ds, int port,
+			     const unsigned char *addr, u16 vid,
+			     struct dsa_db db)
 {
 	struct sja1105_private *priv = ds->priv;
 
@@ -1837,6 +1842,20 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port,
 	return priv->info->fdb_del_cmd(ds, port, addr, vid);
 }
 
+static int sja1105_fdb_del(struct dsa_switch *ds, int port,
+			   const unsigned char *addr, u16 vid,
+			   struct dsa_db db)
+{
+	struct sja1105_private *priv = ds->priv;
+	int rc;
+
+	mutex_lock(&priv->fdb_lock);
+	rc = __sja1105_fdb_del(ds, port, addr, vid, db);
+	mutex_unlock(&priv->fdb_lock);
+
+	return rc;
+}
+
 static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
 			    dsa_fdb_dump_cb_t *cb, void *data)
 {
@@ -1899,6 +1918,8 @@ static void sja1105_fast_age(struct dsa_switch *ds, int port)
 	};
 	int i;
 
+	mutex_lock(&priv->fdb_lock);
+
 	for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
 		struct sja1105_l2_lookup_entry l2_lookup = {0};
 		u8 macaddr[ETH_ALEN];
@@ -1912,7 +1933,7 @@ static void sja1105_fast_age(struct dsa_switch *ds, int port)
 		if (rc) {
 			dev_err(ds->dev, "Failed to read FDB: %pe\n",
 				ERR_PTR(rc));
-			return;
+			break;
 		}
 
 		if (!(l2_lookup.destports & BIT(port)))
@@ -1924,14 +1945,16 @@ static void sja1105_fast_age(struct dsa_switch *ds, int port)
 
 		u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
-		rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid, db);
+		rc = __sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid, db);
 		if (rc) {
 			dev_err(ds->dev,
 				"Failed to delete FDB entry %pM vid %lld: %pe\n",
 				macaddr, l2_lookup.vlanid, ERR_PTR(rc));
-			return;
+			break;
 		}
 	}
+
+	mutex_unlock(&priv->fdb_lock);
 }
 
 static int sja1105_mdb_add(struct dsa_switch *ds, int port,
@@ -2955,7 +2978,9 @@ static int sja1105_port_mcast_flood(struct sja1105_private *priv, int to,
 {
 	struct sja1105_l2_lookup_entry *l2_lookup;
 	struct sja1105_table *table;
-	int match;
+	int match, rc;
+
+	mutex_lock(&priv->fdb_lock);
 
 	table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
 	l2_lookup = table->entries;
@@ -2968,7 +2993,8 @@ static int sja1105_port_mcast_flood(struct sja1105_private *priv, int to,
 	if (match == table->entry_count) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Could not find FDB entry for unknown multicast");
-		return -ENOSPC;
+		rc = -ENOSPC;
+		goto out;
 	}
 
 	if (flags.val & BR_MCAST_FLOOD)
@@ -2976,10 +3002,13 @@ static int sja1105_port_mcast_flood(struct sja1105_private *priv, int to,
 	else
 		l2_lookup[match].destports &= ~BIT(to);
 
-	return sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
-					    l2_lookup[match].index,
-					    &l2_lookup[match],
-					    true);
+	rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+					  l2_lookup[match].index,
+					  &l2_lookup[match], true);
+out:
+	mutex_unlock(&priv->fdb_lock);
+
+	return rc;
 }
 
 static int sja1105_port_pre_bridge_flags(struct dsa_switch *ds, int port,
@@ -3349,6 +3378,7 @@ static int sja1105_probe(struct spi_device *spi)
 	mutex_init(&priv->ptp_data.lock);
 	mutex_init(&priv->dynamic_config_lock);
 	mutex_init(&priv->mgmt_lock);
+	mutex_init(&priv->fdb_lock);
 	spin_lock_init(&priv->ts_id_lock);
 
 	rc = sja1105_parse_dt(priv);
-- 
cgit v1.2.3


From 86899e9e1e29e854b5f6dcc24ba4f75f792c89aa Mon Sep 17 00:00:00 2001
From: Vladimir Oltean
Date: Fri, 8 Sep 2023 16:33:52 +0300
Subject: net: dsa: sja1105: block FDB accesses that are concurrent with a
 switch reset

Currently, when we add the first sja1105 port to a bridge with
vlan_filtering 1, then we sometimes see this output:

sja1105 spi2.2: port 4 failed to read back entry for be:79:b4:9e:9e:96 vid 3088: -ENOENT
sja1105 spi2.2: Reset switch and programmed static config. Reason: VLAN filtering
sja1105 spi2.2: port 0 failed to add be:79:b4:9e:9e:96 vid 0 to fdb: -2

It is because sja1105_fdb_add() runs from the dsa_owq which is no longer
serialized with switch resets since it dropped the rtnl_lock() in the
blamed commit.

Either performing the FDB accesses before the reset, or after the reset,
is equally fine, because sja1105_static_fdb_change() backs up those
changes in the static config, but FDB access during reset isn't ok.

Make sja1105_static_config_reload() take the fdb_lock to fix that.

Fixes: 0faf890fc519 ("net: dsa: drop rtnl_lock from dsa_slave_switchdev_event_work")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index cefd72617af4..1a367e64bc3b 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2297,6 +2297,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	int rc, i;
 	s64 now;
 
+	mutex_lock(&priv->fdb_lock);
 	mutex_lock(&priv->mgmt_lock);
 
 	mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
@@ -2409,6 +2410,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 		goto out;
 out:
 	mutex_unlock(&priv->mgmt_lock);
+	mutex_unlock(&priv->fdb_lock);
 
 	return rc;
 }
-- 
cgit v1.2.3


From a7b8d60b37237680009dd0b025fe8c067aba0ee3 Mon Sep 17 00:00:00 2001
From: Hayes Wang
Date: Fri, 8 Sep 2023 15:01:52 +0800
Subject: r8152: check budget for r8152_poll()

According to the document of napi, there is no rx process when the
budget is 0. Therefore, r8152_poll() has to return 0 directly when the
budget is equal to 0.

Fixes: d2187f8e4454 ("r8152: divide the tx and rx bottom functions")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 332c853ca99b..0c13d9950cd8 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -2636,6 +2636,9 @@ static int r8152_poll(struct napi_struct *napi, int budget)
 	struct r8152 *tp = container_of(napi, struct r8152, napi);
 	int work_done;
 
+	if (!budget)
+		return 0;
+
 	work_done = rx_bottom(tp, budget);
 
 	if (work_done < budget) {
-- 
cgit v1.2.3


From c821a88bd720b0046433173185fd841a100d44ad Mon Sep 17 00:00:00 2001
From: Shigeru Yoshida
Date: Sun, 10 Sep 2023 02:03:10 +0900
Subject: kcm: Fix memory leak in error path of kcm_sendmsg()

syzbot reported a memory leak like below:

BUG: memory leak
unreferenced object 0xffff88810b088c00 (size 240):
  comm "syz-executor186", pid 5012, jiffies 4294943306 (age 13.680s)
  hex dump (first 32 bytes):
    00 89 08 0b 81 88 ff ff 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff83e5d5ff>] __alloc_skb+0x1ef/0x230 net/core/skbuff.c:634
    [<ffffffff84606e59>] alloc_skb include/linux/skbuff.h:1289 [inline]
    [<ffffffff84606e59>] kcm_sendmsg+0x269/0x1050 net/kcm/kcmsock.c:815
    [<ffffffff83e479c6>] sock_sendmsg_nosec net/socket.c:725 [inline]
    [<ffffffff83e479c6>] sock_sendmsg+0x56/0xb0 net/socket.c:748
    [<ffffffff83e47f55>] ____sys_sendmsg+0x365/0x470 net/socket.c:2494
    [<ffffffff83e4c389>] ___sys_sendmsg+0xc9/0x130 net/socket.c:2548
    [<ffffffff83e4c536>] __sys_sendmsg+0xa6/0x120 net/socket.c:2577
    [<ffffffff84ad7bb8>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
    [<ffffffff84ad7bb8>] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
    [<ffffffff84c0008b>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

In kcm_sendmsg(), kcm_tx_msg(head)->last_skb is used as a cursor to append
newly allocated skbs to 'head'. If some bytes are copied, an error occurred,
and jumped to out_error label, 'last_skb' is left unmodified. A later
kcm_sendmsg() will use an obsoleted 'last_skb' reference, corrupting the
'head' frag_list and causing the leak.

This patch fixes this issue by properly updating the last allocated skb in
'last_skb'.

Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Reported-and-tested-by: syzbot+6f98de741f7dbbfc4ccb@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=6f98de741f7dbbfc4ccb
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 4580f61426bb..740539a218b7 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -939,6 +939,8 @@ out_error:
 
 	if (head != kcm->seq_skb)
 		kfree_skb(head);
+	else if (copied)
+		kcm_tx_msg(head)->last_skb = skb;
 
 	err = sk_stream_error(sk, msg->msg_flags, err);
 
-- 
cgit v1.2.3


From e10a35abb3da12b812cfb6fc6137926a0c81e39a Mon Sep 17 00:00:00 2001
From: Daniel Golle
Date: Sun, 10 Sep 2023 22:40:30 +0100
Subject: net: ethernet: mtk_eth_soc: fix uninitialized variable

Variable dma_addr in function mtk_poll_rx can be uninitialized on
some of the error paths. In practise this doesn't matter, even random
data present in uninitialized stack memory can safely be used in the
way it happens in the error path.

However, in order to make Smatch happy make sure the variable is
always initialized.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 2372ce8c2580..3cffd1bd3067 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2005,11 +2005,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 	u8 *data, *new_data;
 	struct mtk_rx_dma_v2 *rxd, trxd;
 	int done = 0, bytes = 0;
+	dma_addr_t dma_addr = DMA_MAPPING_ERROR;
 
 	while (done < budget) {
 		unsigned int pktlen, *rxdcsum;
 		struct net_device *netdev;
-		dma_addr_t dma_addr;
 		u32 hash, reason;
 		int mac = 0;
 
@@ -2186,7 +2186,8 @@ release_desc:
 		else
 			rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size);
 
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA))
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA) &&
+		    likely(dma_addr != DMA_MAPPING_ERROR))
 			rxd->rxd2 |= RX_DMA_PREP_ADDR64(dma_addr);
 
 		ring->calc_idx = idx;
-- 
cgit v1.2.3


From 5a124b1fd3e6cb15a943f0cdfe96aa8f6d3d2f39 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi
Date: Sat, 9 Sep 2023 20:41:56 +0200
Subject: net: ethernet: mtk_eth_soc: fix pse_port configuration for MT7988

MT7988 SoC support 3 NICs. Fix pse_port configuration in
mtk_flow_set_output_device routine if the traffic is offloaded to eth2.
Rely on mtk_pse_port definitions.

Fixes: 88efedf517e6 ("net: ethernet: mtk_eth_soc: enable nft hw flowtable_offload for MT7988 SoC")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index a70a5417c173..a4efbeb16208 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -214,9 +214,11 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
 	dsa_port = mtk_flow_get_dsa_port(&dev);
 
 	if (dev == eth->netdev[0])
-		pse_port = 1;
+		pse_port = PSE_GDM1_PORT;
 	else if (dev == eth->netdev[1])
-		pse_port = 2;
+		pse_port = PSE_GDM2_PORT;
+	else if (dev == eth->netdev[2])
+		pse_port = PSE_GDM3_PORT;
 	else
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From cfaa80c91f6f99b9342b6557f0f0e1143e434066 Mon Sep 17 00:00:00 2001
From: Liu Jian
Date: Sat, 9 Sep 2023 16:14:34 +0800
Subject: net/tls: do not free tls_rec on async operation in
 bpf_exec_tx_verdict()

I got the below warning when do fuzzing test:
BUG: KASAN: null-ptr-deref in scatterwalk_copychunks+0x320/0x470
Read of size 4 at addr 0000000000000008 by task kworker/u8:1/9

CPU: 0 PID: 9 Comm: kworker/u8:1 Tainted: G           OE
Hardware name: linux,dummy-virt (DT)
Workqueue: pencrypt_parallel padata_parallel_worker
Call trace:
 dump_backtrace+0x0/0x420
 show_stack+0x34/0x44
 dump_stack+0x1d0/0x248
 __kasan_report+0x138/0x140
 kasan_report+0x44/0x6c
 __asan_load4+0x94/0xd0
 scatterwalk_copychunks+0x320/0x470
 skcipher_next_slow+0x14c/0x290
 skcipher_walk_next+0x2fc/0x480
 skcipher_walk_first+0x9c/0x110
 skcipher_walk_aead_common+0x380/0x440
 skcipher_walk_aead_encrypt+0x54/0x70
 ccm_encrypt+0x13c/0x4d0
 crypto_aead_encrypt+0x7c/0xfc
 pcrypt_aead_enc+0x28/0x84
 padata_parallel_worker+0xd0/0x2dc
 process_one_work+0x49c/0xbdc
 worker_thread+0x124/0x880
 kthread+0x210/0x260
 ret_from_fork+0x10/0x18

This is because the value of rec_seq of tls_crypto_info configured by the
user program is too large, for example, 0xffffffffffffff. In addition, TLS
is asynchronously accelerated. When tls_do_encryption() returns
-EINPROGRESS and sk->sk_err is set to EBADMSG due to rec_seq overflow,
skmsg is released before the asynchronous encryption process ends. As a
result, the UAF problem occurs during the asynchronous processing of the
encryption module.

If the operation is asynchronous and the encryption module returns
EINPROGRESS, do not free the record information.

Fixes: 635d93981786 ("net/tls: free record only on encryption error")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/20230909081434.2324940-1-liujian56@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/tls/tls_sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1ed4a611631f..d1fc295b83b5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -817,7 +817,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 	psock = sk_psock_get(sk);
 	if (!psock || !policy) {
 		err = tls_push_record(sk, flags, record_type);
-		if (err && sk->sk_err == EBADMSG) {
+		if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
 			*copied -= sk_msg_free(sk, msg);
 			tls_free_open_rec(sk);
 			err = -sk->sk_err;
@@ -846,7 +846,7 @@ more_data:
 	switch (psock->eval) {
 	case __SK_PASS:
 		err = tls_push_record(sk, flags, record_type);
-		if (err && sk->sk_err == EBADMSG) {
+		if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
 			*copied -= sk_msg_free(sk, msg);
 			tls_free_open_rec(sk);
 			err = -sk->sk_err;
-- 
cgit v1.2.3


From 403f0e771457e2b8811dc280719d11b9bacf10f4 Mon Sep 17 00:00:00 2001
From: Sascha Hauer
Date: Fri, 8 Sep 2023 13:29:13 +0200
Subject: net: macb: fix sleep inside spinlock

macb_set_tx_clk() is called under a spinlock but itself calls clk_set_rate()
which can sleep. This results in:

| BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
| pps pps1: new PPS source ptp1
| in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 40, name: kworker/u4:3
| preempt_count: 1, expected: 0
| RCU nest depth: 0, expected: 0
| 4 locks held by kworker/u4:3/40:
|  #0: ffff000003409148
| macb ff0c0000.ethernet: gem-ptp-timer ptp clock registered.
|  ((wq_completion)events_power_efficient){+.+.}-{0:0}, at: process_one_work+0x14c/0x51c
|  #1: ffff8000833cbdd8 ((work_completion)(&pl->resolve)){+.+.}-{0:0}, at: process_one_work+0x14c/0x51c
|  #2: ffff000004f01578 (&pl->state_mutex){+.+.}-{4:4}, at: phylink_resolve+0x44/0x4e8
|  #3: ffff000004f06f50 (&bp->lock){....}-{3:3}, at: macb_mac_link_up+0x40/0x2ac
| irq event stamp: 113998
| hardirqs last  enabled at (113997): [<ffff800080e8503c>] _raw_spin_unlock_irq+0x30/0x64
| hardirqs last disabled at (113998): [<ffff800080e84478>] _raw_spin_lock_irqsave+0xac/0xc8
| softirqs last  enabled at (113608): [<ffff800080010630>] __do_softirq+0x430/0x4e4
| softirqs last disabled at (113597): [<ffff80008001614c>] ____do_softirq+0x10/0x1c
| CPU: 0 PID: 40 Comm: kworker/u4:3 Not tainted 6.5.0-11717-g9355ce8b2f50-dirty #368
| Hardware name: ... ZynqMP ... (DT)
| Workqueue: events_power_efficient phylink_resolve
| Call trace:
|  dump_backtrace+0x98/0xf0
|  show_stack+0x18/0x24
|  dump_stack_lvl+0x60/0xac
|  dump_stack+0x18/0x24
|  __might_resched+0x144/0x24c
|  __might_sleep+0x48/0x98
|  __mutex_lock+0x58/0x7b0
|  mutex_lock_nested+0x24/0x30
|  clk_prepare_lock+0x4c/0xa8
|  clk_set_rate+0x24/0x8c
|  macb_mac_link_up+0x25c/0x2ac
|  phylink_resolve+0x178/0x4e8
|  process_one_work+0x1ec/0x51c
|  worker_thread+0x1ec/0x3e4
|  kthread+0x120/0x124
|  ret_from_fork+0x10/0x20

The obvious fix is to move the call to macb_set_tx_clk() out of the
protected area. This seems safe as rx and tx are both disabled anyway at
this point.
It is however not entirely clear what the spinlock shall protect. It
could be the read-modify-write access to the NCFGR register, but this
is accessed in macb_set_rx_mode() and macb_set_rxcsum_feature() as well
without holding the spinlock. It could also be the register accesses
done in mog_init_rings() or macb_init_buffers(), but again these
functions are called without holding the spinlock in macb_hresp_error_task().
The locking seems fishy in this driver and it might deserve another look
before this patch is applied.

Fixes: 633e98a711ac0 ("net: macb: use resolved link config in mac_link_up()")
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20230908112913.1701766-1-s.hauer@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 31f664ee4d77..b940dcd3ace6 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -756,8 +756,6 @@ static void macb_mac_link_up(struct phylink_config *config,
 		if (rx_pause)
 			ctrl |= MACB_BIT(PAE);
 
-		macb_set_tx_clk(bp, speed);
-
 		/* Initialize rings & buffers as clearing MACB_BIT(TE) in link down
 		 * cleared the pipeline and control registers.
 		 */
@@ -777,6 +775,9 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	spin_unlock_irqrestore(&bp->lock, flags);
 
+	if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC))
+		macb_set_tx_clk(bp, speed);
+
 	/* Enable Rx and Tx; Enable PTP unicast */
 	ctrl = macb_readl(bp, NCR);
 	if (gem_has_ptp(bp))
-- 
cgit v1.2.3


From 7a6102aa6df0d5d032b4cbc51935d1d4cda17254 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen
Date: Mon, 11 Sep 2023 15:58:25 +0200
Subject: veth: Update XDP feature set when bringing up device

There's an early return in veth_set_features() if the device is in a down
state, which leads to the XDP feature flags not being updated when enabling
GRO while the device is down. Which in turn leads to XDP_REDIRECT not
working, because the redirect code now checks the flags.

Fix this by updating the feature flags after bringing the device up.

Before this patch:

NETDEV_XDP_ACT_BASIC:		yes
NETDEV_XDP_ACT_REDIRECT:	yes
NETDEV_XDP_ACT_NDO_XMIT:	no
NETDEV_XDP_ACT_XSK_ZEROCOPY:	no
NETDEV_XDP_ACT_HW_OFFLOAD:	no
NETDEV_XDP_ACT_RX_SG:		yes
NETDEV_XDP_ACT_NDO_XMIT_SG:	no

After this patch:

NETDEV_XDP_ACT_BASIC:		yes
NETDEV_XDP_ACT_REDIRECT:	yes
NETDEV_XDP_ACT_NDO_XMIT:	yes
NETDEV_XDP_ACT_XSK_ZEROCOPY:	no
NETDEV_XDP_ACT_HW_OFFLOAD:	no
NETDEV_XDP_ACT_RX_SG:		yes
NETDEV_XDP_ACT_NDO_XMIT_SG:	yes

Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag")
Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20230911135826.722295-1-toke@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/veth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9c6f4f83f22b..0deefd1573cf 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1446,6 +1446,8 @@ static int veth_open(struct net_device *dev)
 		netif_carrier_on(peer);
 	}
 
+	veth_set_xdp_features(dev);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8cdd9f1aaedf823006449faa4e540026c692ac43 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Mon, 11 Sep 2023 15:42:13 +0000
Subject: ipv6: fix ip6_sock_set_addr_preferences() typo

ip6_sock_set_addr_preferences() second argument should be an integer.

SUNRPC attempts to set IPV6_PREFER_SRC_PUBLIC were
translated to IPV6_PREFER_SRC_TMP

Fixes: 18d5ad623275 ("ipv6: add ip6_sock_set_addr_preferences")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230911154213.713941-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 0675be0f3fa0..fe274c122a56 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1360,7 +1360,7 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
 	return 0;
 }
 
-static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
 {
 	int ret;
 
-- 
cgit v1.2.3


From c6d277064b1da7f9015b575a562734de87a7e463 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:36:55 -0700
Subject: tcp: Factorise sk_family-independent comparison in
 inet_bind2_bucket_match(_addr_any).

This is a prep patch to make the following patches cleaner that touch
inet_bind2_bucket_match() and inet_bind2_bucket_match_addr_any().

Both functions have duplicated comparison for netns, port, and l3mdev.
Let's factorise them.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7876b7d703cb..5c54f2804174 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -815,41 +815,39 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
 				    const struct net *net, unsigned short port,
 				    int l3mdev, const struct sock *sk)
 {
+	if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+	    tb->l3mdev != l3mdev)
+		return false;
+
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family != tb->family)
 		return false;
 
 	if (sk->sk_family == AF_INET6)
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev &&
-			ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
-	else
+		return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
 #endif
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+	return tb->rcv_saddr == sk->sk_rcv_saddr;
 }
 
 bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
 				      unsigned short port, int l3mdev, const struct sock *sk)
 {
+	if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+	    tb->l3mdev != l3mdev)
+		return false;
+
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family != tb->family) {
 		if (sk->sk_family == AF_INET)
-			return net_eq(ib2_net(tb), net) && tb->port == port &&
-				tb->l3mdev == l3mdev &&
-				ipv6_addr_any(&tb->v6_rcv_saddr);
+			return ipv6_addr_any(&tb->v6_rcv_saddr);
 
 		return false;
 	}
 
 	if (sk->sk_family == AF_INET6)
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev &&
-			ipv6_addr_any(&tb->v6_rcv_saddr);
-	else
+		return ipv6_addr_any(&tb->v6_rcv_saddr);
 #endif
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+	return tb->rcv_saddr == 0;
 }
 
 /* The socket's bhash2 hashbucket spinlock must be held when this is called */
-- 
cgit v1.2.3


From aa99e5f87bd54db55dd37cb130bd5eb55933027f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:36:56 -0700
Subject: tcp: Fix bind() regression for v4-mapped-v6 wildcard address.

Andrei Vagin reported bind() regression with strace logs.

If we bind() a TCPv6 socket to ::FFFF:0.0.0.0 and then bind() a TCPv4
socket to 127.0.0.1, the 2nd bind() should fail but now succeeds.

  from socket import *

  s1 = socket(AF_INET6, SOCK_STREAM)
  s1.bind(('::ffff:0.0.0.0', 0))

  s2 = socket(AF_INET, SOCK_STREAM)
  s2.bind(('127.0.0.1', s1.getsockname()[1]))

During the 2nd bind(), if tb->family is AF_INET6 and sk->sk_family is
AF_INET in inet_bind2_bucket_match_addr_any(), we still need to check
if tb has the v4-mapped-v6 wildcard address.

The example above does not work after commit 5456262d2baa ("net: Fix
incorrect address comparison when searching for a bind2 bucket"), but
the blamed change is not the commit.

Before the commit, the leading zeros of ::FFFF:0.0.0.0 were treated
as 0.0.0.0, and the sequence above worked by chance.  Technically, this
case has been broken since bhash2 was introduced.

Note that if we bind() two sockets to 127.0.0.1 and then ::FFFF:0.0.0.0,
the 2nd bind() fails properly because we fall back to using bhash to
detect conflicts for the v4-mapped-v6 address.

Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
Reported-by: Andrei Vagin <avagin@google.com>
Closes: https://lore.kernel.org/netdev/ZPuYBOFC8zsK6r9T@google.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h         | 5 +++++
 net/ipv4/inet_hashtables.c | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index fe274c122a56..c6932d1a3fa8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -784,6 +784,11 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
 					cpu_to_be32(0x0000ffff))) == 0UL;
 }
 
+static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a)
+{
+	return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]);
+}
+
 static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
 {
 	return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5c54f2804174..a58b04052ca6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -839,7 +839,8 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family != tb->family) {
 		if (sk->sk_family == AF_INET)
-			return ipv6_addr_any(&tb->v6_rcv_saddr);
+			return ipv6_addr_any(&tb->v6_rcv_saddr) ||
+				ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr);
 
 		return false;
 	}
-- 
cgit v1.2.3


From c48ef9c4aed3632566b57ba66cec6ec78624d4cb Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:36:57 -0700
Subject: tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.

Since bhash2 was introduced, the example below does not work as expected.
These two bind() should conflict, but the 2nd bind() now succeeds.

  from socket import *

  s1 = socket(AF_INET6, SOCK_STREAM)
  s1.bind(('::ffff:127.0.0.1', 0))

  s2 = socket(AF_INET, SOCK_STREAM)
  s2.bind(('127.0.0.1', s1.getsockname()[1]))

During the 2nd bind() in inet_csk_get_port(), inet_bind2_bucket_find()
fails to find the 1st socket's tb2, so inet_bind2_bucket_create() allocates
a new tb2 for the 2nd socket.  Then, we call inet_csk_bind_conflict() that
checks conflicts in the new tb2 by inet_bhash2_conflict().  However, the
new tb2 does not include the 1st socket, thus the bind() finally succeeds.

In this case, inet_bind2_bucket_match() must check if AF_INET6 tb2 has
the conflicting v4-mapped-v6 address so that inet_bind2_bucket_find()
returns the 1st socket's tb2.

Note that if we bind two sockets to 127.0.0.1 and then ::FFFF:127.0.0.1,
the 2nd bind() fails properly for the same reason mentinoed in the previous
commit.

Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a58b04052ca6..c32f5e28758b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -820,8 +820,13 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
 		return false;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family != tb->family)
+	if (sk->sk_family != tb->family) {
+		if (sk->sk_family == AF_INET)
+			return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+				tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+
 		return false;
+	}
 
 	if (sk->sk_family == AF_INET6)
 		return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
-- 
cgit v1.2.3


From 0071d15517b4a3d265abc00395beb1138e7236c7 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:36:58 -0700
Subject: selftest: tcp: Fix address length in bind_wildcard.c.

The selftest passes the IPv6 address length for an IPv4 address.
We should pass the correct length.

Note inet_bind_sk() does not check if the size is larger than
sizeof(struct sockaddr_in), so there is no real bug in this
selftest.

Fixes: 13715acf8ab5 ("selftest: Add test for bind() conflicts.")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/bind_wildcard.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index 58edfc15d28b..e7ebe72e879d 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -100,7 +100,7 @@ void bind_sockets(struct __test_metadata *_metadata,
 TEST_F(bind_wildcard, v4_v6)
 {
 	bind_sockets(_metadata, self,
-		     (struct sockaddr *)&self->addr4, sizeof(self->addr6),
+		     (struct sockaddr *)&self->addr4, sizeof(self->addr4),
 		     (struct sockaddr *)&self->addr6, sizeof(self->addr6));
 }
 
-- 
cgit v1.2.3


From 2895d879dd41a588d80acde1aa832deb38d67823 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:36:59 -0700
Subject: selftest: tcp: Move expected_errno into each test case in
 bind_wildcard.c.

This is a preparation patch for the following patch.

Let's define expected_errno in each test case so that we can add other test
cases easily.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/bind_wildcard.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index e7ebe72e879d..81f694536099 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -10,37 +10,41 @@ FIXTURE(bind_wildcard)
 {
 	struct sockaddr_in addr4;
 	struct sockaddr_in6 addr6;
-	int expected_errno;
 };
 
 FIXTURE_VARIANT(bind_wildcard)
 {
 	const __u32 addr4_const;
 	const struct in6_addr *addr6_const;
+	int expected_errno;
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
 {
 	.addr4_const = INADDR_ANY,
 	.addr6_const = &in6addr_any,
+	.expected_errno = EADDRINUSE,
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
 {
 	.addr4_const = INADDR_ANY,
 	.addr6_const = &in6addr_loopback,
+	.expected_errno = 0,
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
 {
 	.addr4_const = INADDR_LOOPBACK,
 	.addr6_const = &in6addr_any,
+	.expected_errno = EADDRINUSE,
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
 {
 	.addr4_const = INADDR_LOOPBACK,
 	.addr6_const = &in6addr_loopback,
+	.expected_errno = 0,
 };
 
 FIXTURE_SETUP(bind_wildcard)
@@ -52,11 +56,6 @@ FIXTURE_SETUP(bind_wildcard)
 	self->addr6.sin6_family = AF_INET6;
 	self->addr6.sin6_port = htons(0);
 	self->addr6.sin6_addr = *variant->addr6_const;
-
-	if (variant->addr6_const == &in6addr_any)
-		self->expected_errno = EADDRINUSE;
-	else
-		self->expected_errno = 0;
 }
 
 FIXTURE_TEARDOWN(bind_wildcard)
@@ -65,6 +64,7 @@ FIXTURE_TEARDOWN(bind_wildcard)
 
 void bind_sockets(struct __test_metadata *_metadata,
 		  FIXTURE_DATA(bind_wildcard) *self,
+		  int expected_errno,
 		  struct sockaddr *addr1, socklen_t addrlen1,
 		  struct sockaddr *addr2, socklen_t addrlen2)
 {
@@ -86,9 +86,9 @@ void bind_sockets(struct __test_metadata *_metadata,
 	ASSERT_GT(fd[1], 0);
 
 	ret = bind(fd[1], addr2, addrlen2);
-	if (self->expected_errno) {
+	if (expected_errno) {
 		ASSERT_EQ(ret, -1);
-		ASSERT_EQ(errno, self->expected_errno);
+		ASSERT_EQ(errno, expected_errno);
 	} else {
 		ASSERT_EQ(ret, 0);
 	}
@@ -99,14 +99,14 @@ void bind_sockets(struct __test_metadata *_metadata,
 
 TEST_F(bind_wildcard, v4_v6)
 {
-	bind_sockets(_metadata, self,
+	bind_sockets(_metadata, self, variant->expected_errno,
 		     (struct sockaddr *)&self->addr4, sizeof(self->addr4),
 		     (struct sockaddr *)&self->addr6, sizeof(self->addr6));
 }
 
 TEST_F(bind_wildcard, v6_v4)
 {
-	bind_sockets(_metadata, self,
+	bind_sockets(_metadata, self, variant->expected_errno,
 		     (struct sockaddr *)&self->addr6, sizeof(self->addr6),
 		     (struct sockaddr *)&self->addr4, sizeof(self->addr4));
 }
-- 
cgit v1.2.3


From 8637d8e8b653f4c8b6fd277b434b118f844d1d77 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 11:37:00 -0700
Subject: selftest: tcp: Add v4-mapped-v6 cases in bind_wildcard.c.

We add these 8 test cases in bind_wildcard.c to check bind() conflicts.

  1st bind()          2nd bind()
  ---------           ---------
  0.0.0.0             ::FFFF:0.0.0.0
  ::FFFF:0.0.0.0      0.0.0.0
  0.0.0.0             ::FFFF:127.0.0.1
  ::FFFF:127.0.0.1    0.0.0.0
  127.0.0.1           ::FFFF:0.0.0.0
  ::FFFF:0.0.0.0      127.0.0.1
  127.0.0.1           ::FFFF:127.0.0.1
  ::FFFF:127.0.0.1    127.0.0.1

All test passed without bhash2 and with bhash2 and this series.

 Before bhash2:
  $ uname -r
  6.0.0-rc1-00393-g0bf73255d3a3
  $ ./bind_wildcard
  ...
  # PASSED: 16 / 16 tests passed.

 Just after bhash2:
  $ uname -r
  6.0.0-rc1-00394-g28044fc1d495
  $ ./bind_wildcard
  ...
  ok 15 bind_wildcard.v4_local_v6_v4mapped_local.v4_v6
  not ok 16 bind_wildcard.v4_local_v6_v4mapped_local.v6_v4
  # FAILED: 15 / 16 tests passed.

 On net.git:
  $ ./bind_wildcard
  ...
  not ok 14 bind_wildcard.v4_local_v6_v4mapped_any.v6_v4
  not ok 16 bind_wildcard.v4_local_v6_v4mapped_local.v6_v4
  # FAILED: 13 / 16 tests passed.

 With this series:
  $ ./bind_wildcard
  ...
  # PASSED: 16 / 16 tests passed.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/bind_wildcard.c | 46 +++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index 81f694536099..a2662348cdb1 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -6,6 +6,24 @@
 
 #include "../kselftest_harness.h"
 
+struct in6_addr in6addr_v4mapped_any = {
+	.s6_addr = {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 255, 255,
+		0, 0, 0, 0
+	}
+};
+
+struct in6_addr in6addr_v4mapped_loopback = {
+	.s6_addr = {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 255, 255,
+		127, 0, 0, 1
+	}
+};
+
 FIXTURE(bind_wildcard)
 {
 	struct sockaddr_in addr4;
@@ -33,6 +51,20 @@ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
 	.expected_errno = 0,
 };
 
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_v4mapped_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_v4mapped_loopback,
+	.expected_errno = EADDRINUSE,
+};
+
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
 {
 	.addr4_const = INADDR_LOOPBACK,
@@ -47,6 +79,20 @@ FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
 	.expected_errno = 0,
 };
 
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_v4mapped_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_v4mapped_loopback,
+	.expected_errno = EADDRINUSE,
+};
+
 FIXTURE_SETUP(bind_wildcard)
 {
 	self->addr4.sin_family = AF_INET;
-- 
cgit v1.2.3


From 3c44191dd76cf9c0cc49adaf34384cbd42ef8ad2 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko
Date: Mon, 11 Sep 2023 13:28:14 -0700
Subject: ixgbe: fix timestamp configuration code

The commit in fixes introduced flags to control the status of hardware
configuration while processing packets. At the same time another structure
is used to provide configuration of timestamper to user-space applications.
The way it was coded makes this structures go out of sync easily. The
repro is easy for 82599 chips:

[root@hostname ~]# hwstamp_ctl -i eth0 -r 12 -t 1
current settings:
tx_type 0
rx_filter 0
new settings:
tx_type 1
rx_filter 12

The eth0 device is properly configured to timestamp any PTPv2 events.

[root@hostname ~]# hwstamp_ctl -i eth0 -r 1 -t 1
current settings:
tx_type 1
rx_filter 12
SIOCSHWTSTAMP failed: Numerical result out of range
The requested time stamping mode is not supported by the hardware.

The error is properly returned because HW doesn't support all packets
timestamping. But the adapter->flags is cleared of timestamp flags
even though no HW configuration was done. From that point no RX timestamps
are received by user-space application. But configuration shows good
values:

[root@hostname ~]# hwstamp_ctl -i eth0
current settings:
tx_type 1
rx_filter 12

Fix the issue by applying new flags only when the HW was actually
configured.

Fixes: a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices")
Signed-off-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 0310af851086..9339edbd9082 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -979,6 +979,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
 	u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
 	u32 tsync_rx_mtrl = PTP_EV_PORT << 16;
+	u32 aflags = adapter->flags;
 	bool is_l2 = false;
 	u32 regval;
 
@@ -996,20 +997,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	case HWTSTAMP_FILTER_NONE:
 		tsync_rx_ctl = 0;
 		tsync_rx_mtrl = 0;
-		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
-				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		aflags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
 		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG;
-		adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
-				   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
 		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
-		adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
-				   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -1023,8 +1024,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
 		is_l2 = true;
 		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-		adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
-				   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_NTP_ALL:
@@ -1035,7 +1036,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 		if (hw->mac.type >= ixgbe_mac_X550) {
 			tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL;
 			config->rx_filter = HWTSTAMP_FILTER_ALL;
-			adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+			aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
 			break;
 		}
 		fallthrough;
@@ -1046,8 +1047,6 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 		 * Delay_Req messages and hardware does not support
 		 * timestamping all packets => return error
 		 */
-		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
-				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		config->rx_filter = HWTSTAMP_FILTER_NONE;
 		return -ERANGE;
 	}
@@ -1079,8 +1078,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 			       IXGBE_TSYNCRXCTL_TYPE_ALL |
 			       IXGBE_TSYNCRXCTL_TSIP_UT_EN;
 		config->rx_filter = HWTSTAMP_FILTER_ALL;
-		adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
-		adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
+		aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+		aflags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
 		is_l2 = true;
 		break;
 	default:
@@ -1113,6 +1112,9 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 
 	IXGBE_WRITE_FLUSH(hw);
 
+	/* configure adapter flags only when HW is actually configured */
+	adapter->flags = aflags;
+
 	/* clear TX/RX time stamp registers, just to be sure */
 	ixgbe_ptp_clear_tx_timestamp(adapter);
 	IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
-- 
cgit v1.2.3


From bc6ed2fa24b14e40e1005488bbe11268ce7108fa Mon Sep 17 00:00:00 2001
From: Corinna Vinschen
Date: Mon, 11 Sep 2023 13:28:49 -0700
Subject: igb: clean up in all error paths when enabling SR-IOV

After commit 50f303496d92 ("igb: Enable SR-IOV after reinit"), removing
the igb module could hang or crash (depending on the machine) when the
module has been loaded with the max_vfs parameter set to some value != 0.

In case of one test machine with a dual port 82580, this hang occurred:

[  232.480687] igb 0000:41:00.1: removed PHC on enp65s0f1
[  233.093257] igb 0000:41:00.1: IOV Disabled
[  233.329969] pcieport 0000:40:01.0: AER: Multiple Uncorrected (Non-Fatal) err0
[  233.340302] igb 0000:41:00.0: PCIe Bus Error: severity=Uncorrected (Non-Fata)
[  233.352248] igb 0000:41:00.0:   device [8086:1516] error status/mask=00100000
[  233.361088] igb 0000:41:00.0:    [20] UnsupReq               (First)
[  233.368183] igb 0000:41:00.0: AER:   TLP Header: 40000001 0000040f cdbfc00c c
[  233.376846] igb 0000:41:00.1: PCIe Bus Error: severity=Uncorrected (Non-Fata)
[  233.388779] igb 0000:41:00.1:   device [8086:1516] error status/mask=00100000
[  233.397629] igb 0000:41:00.1:    [20] UnsupReq               (First)
[  233.404736] igb 0000:41:00.1: AER:   TLP Header: 40000001 0000040f cdbfc00c c
[  233.538214] pci 0000:41:00.1: AER: can't recover (no error_detected callback)
[  233.538401] igb 0000:41:00.0: removed PHC on enp65s0f0
[  233.546197] pcieport 0000:40:01.0: AER: device recovery failed
[  234.157244] igb 0000:41:00.0: IOV Disabled
[  371.619705] INFO: task irq/35-aerdrv:257 blocked for more than 122 seconds.
[  371.627489]       Not tainted 6.4.0-dirty #2
[  371.632257] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this.
[  371.641000] task:irq/35-aerdrv   state:D stack:0     pid:257   ppid:2      f0
[  371.650330] Call Trace:
[  371.653061]  <TASK>
[  371.655407]  __schedule+0x20e/0x660
[  371.659313]  schedule+0x5a/0xd0
[  371.662824]  schedule_preempt_disabled+0x11/0x20
[  371.667983]  __mutex_lock.constprop.0+0x372/0x6c0
[  371.673237]  ? __pfx_aer_root_reset+0x10/0x10
[  371.678105]  report_error_detected+0x25/0x1c0
[  371.682974]  ? __pfx_report_normal_detected+0x10/0x10
[  371.688618]  pci_walk_bus+0x72/0x90
[  371.692519]  pcie_do_recovery+0xb2/0x330
[  371.696899]  aer_process_err_devices+0x117/0x170
[  371.702055]  aer_isr+0x1c0/0x1e0
[  371.705661]  ? __set_cpus_allowed_ptr+0x54/0xa0
[  371.710723]  ? __pfx_irq_thread_fn+0x10/0x10
[  371.715496]  irq_thread_fn+0x20/0x60
[  371.719491]  irq_thread+0xe6/0x1b0
[  371.723291]  ? __pfx_irq_thread_dtor+0x10/0x10
[  371.728255]  ? __pfx_irq_thread+0x10/0x10
[  371.732731]  kthread+0xe2/0x110
[  371.736243]  ? __pfx_kthread+0x10/0x10
[  371.740430]  ret_from_fork+0x2c/0x50
[  371.744428]  </TASK>

The reproducer was a simple script:

  #!/bin/sh
  for i in `seq 1 5`; do
    modprobe -rv igb
    modprobe -v igb max_vfs=1
    sleep 1
    modprobe -rv igb
  done

It turned out that this could only be reproduce on 82580 (quad and
dual-port), but not on 82576, i350 and i210.  Further debugging showed
that igb_enable_sriov()'s call to pci_enable_sriov() is failing, because
dev->is_physfn is 0 on 82580.

Prior to commit 50f303496d92 ("igb: Enable SR-IOV after reinit"),
igb_enable_sriov() jumped into the "err_out" cleanup branch.  After this
commit it only returned the error code.

So the cleanup didn't take place, and the incorrect VF setup in the
igb_adapter structure fooled the igb driver into assuming that VFs have
been set up where no VF actually existed.

Fix this problem by cleaning up again if pci_enable_sriov() fails.

Fixes: 50f303496d92 ("igb: Enable SR-IOV after reinit")
Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 13ba9c74bd84..76b34cee1da3 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3827,8 +3827,11 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit)
 	}
 
 	/* only call pci_enable_sriov() if no VFs are allocated already */
-	if (!old_vfs)
+	if (!old_vfs) {
 		err = pci_enable_sriov(pdev, adapter->vfs_allocated_count);
+		if (err)
+			goto err_out;
+	}
 
 	goto out;
 
-- 
cgit v1.2.3


From e7b1ef29420fe52c2c1a273a9b4b36103a522625 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda
Date: Tue, 12 Sep 2023 10:49:35 +0900
Subject: net: renesas: rswitch: Fix unmasking irq condition

Fix unmasking irq condition by using napi_complete_done(). Otherwise,
redundant interrupts happen.

Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/renesas/rswitch.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 6083b1c8e4fb..26c8807d7dea 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -816,10 +816,10 @@ retry:
 
 	netif_wake_subqueue(ndev, 0);
 
-	napi_complete(napi);
-
-	rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true);
-	rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true);
+	if (napi_complete_done(napi, budget - quota)) {
+		rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true);
+		rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true);
+	}
 
 out:
 	return budget - quota;
-- 
cgit v1.2.3


From c4f922e86c8e0f7c5fe94e0547e9835fc9711f08 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda
Date: Tue, 12 Sep 2023 10:49:36 +0900
Subject: net: renesas: rswitch: Add spin lock protection for irq {un}mask

Add spin lock protection for irq {un}mask registers' control.

After napi_complete_done() and this protection were applied,
a lot of redundant interrupts no longer occur.

For example: when "iperf3 -c <ipaddr> -R" on R-Car S4-8 Spider
 Before the patches are applied: about 800,000 times happened
 After the patches were applied: about 100,000 times happened

Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/renesas/rswitch.c | 12 ++++++++++++
 drivers/net/ethernet/renesas/rswitch.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 26c8807d7dea..ea9186178091 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -799,6 +799,7 @@ static int rswitch_poll(struct napi_struct *napi, int budget)
 	struct net_device *ndev = napi->dev;
 	struct rswitch_private *priv;
 	struct rswitch_device *rdev;
+	unsigned long flags;
 	int quota = budget;
 
 	rdev = netdev_priv(ndev);
@@ -817,8 +818,10 @@ retry:
 	netif_wake_subqueue(ndev, 0);
 
 	if (napi_complete_done(napi, budget - quota)) {
+		spin_lock_irqsave(&priv->lock, flags);
 		rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true);
 		rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true);
+		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 out:
@@ -835,8 +838,10 @@ static void rswitch_queue_interrupt(struct net_device *ndev)
 	struct rswitch_device *rdev = netdev_priv(ndev);
 
 	if (napi_schedule_prep(&rdev->napi)) {
+		spin_lock(&rdev->priv->lock);
 		rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, false);
 		rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, false);
+		spin_unlock(&rdev->priv->lock);
 		__napi_schedule(&rdev->napi);
 	}
 }
@@ -1440,14 +1445,17 @@ static void rswitch_ether_port_deinit_all(struct rswitch_private *priv)
 static int rswitch_open(struct net_device *ndev)
 {
 	struct rswitch_device *rdev = netdev_priv(ndev);
+	unsigned long flags;
 
 	phy_start(ndev->phydev);
 
 	napi_enable(&rdev->napi);
 	netif_start_queue(ndev);
 
+	spin_lock_irqsave(&rdev->priv->lock, flags);
 	rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, true);
 	rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, true);
+	spin_unlock_irqrestore(&rdev->priv->lock, flags);
 
 	if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
 		iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE);
@@ -1461,6 +1469,7 @@ static int rswitch_stop(struct net_device *ndev)
 {
 	struct rswitch_device *rdev = netdev_priv(ndev);
 	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
+	unsigned long flags;
 
 	netif_tx_stop_all_queues(ndev);
 	bitmap_clear(rdev->priv->opened_ports, rdev->port, 1);
@@ -1476,8 +1485,10 @@ static int rswitch_stop(struct net_device *ndev)
 		kfree(ts_info);
 	}
 
+	spin_lock_irqsave(&rdev->priv->lock, flags);
 	rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, false);
 	rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, false);
+	spin_unlock_irqrestore(&rdev->priv->lock, flags);
 
 	phy_stop(ndev->phydev);
 	napi_disable(&rdev->napi);
@@ -1887,6 +1898,7 @@ static int renesas_eth_sw_probe(struct platform_device *pdev)
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
+	spin_lock_init(&priv->lock);
 
 	attr = soc_device_match(rswitch_soc_no_speed_change);
 	if (attr)
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index 54f397effbc6..f0c16a37ea55 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -1011,6 +1011,8 @@ struct rswitch_private {
 	struct rswitch_etha etha[RSWITCH_NUM_PORTS];
 	struct rswitch_mfwd mfwd;
 
+	spinlock_t lock;	/* lock interrupt registers' control */
+
 	bool etha_no_runtime_change;
 	bool gwca_halt;
 };
-- 
cgit v1.2.3


From a22730b1b4bf437c6bbfdeff5feddf54be4aeada Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima
Date: Mon, 11 Sep 2023 19:27:53 -0700
Subject: kcm: Fix error handling for SOCK_DGRAM in kcm_sendmsg().

syzkaller found a memory leak in kcm_sendmsg(), and commit c821a88bd720
("kcm: Fix memory leak in error path of kcm_sendmsg()") suppressed it by
updating kcm_tx_msg(head)->last_skb if partial data is copied so that the
following sendmsg() will resume from the skb.

However, we cannot know how many bytes were copied when we get the error.
Thus, we could mess up the MSG_MORE queue.

When kcm_sendmsg() fails for SOCK_DGRAM, we should purge the queue as we
do so for UDP by udp_flush_pending_frames().

Even without this change, when the error occurred, the following sendmsg()
resumed from a wrong skb and the queue was messed up.  However, we have
yet to get such a report, and only syzkaller stumbled on it.  So, this
can be changed safely.

Note this does not change SOCK_SEQPACKET behaviour.

Fixes: c821a88bd720 ("kcm: Fix memory leak in error path of kcm_sendmsg()")
Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230912022753.33327-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/kcm/kcmsock.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 740539a218b7..dd1d8ffd5f59 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -930,17 +930,18 @@ partial_message:
 out_error:
 	kcm_push(kcm);
 
-	if (copied && sock->type == SOCK_SEQPACKET) {
+	if (sock->type == SOCK_SEQPACKET) {
 		/* Wrote some bytes before encountering an
 		 * error, return partial success.
 		 */
-		goto partial_message;
-	}
-
-	if (head != kcm->seq_skb)
+		if (copied)
+			goto partial_message;
+		if (head != kcm->seq_skb)
+			kfree_skb(head);
+	} else {
 		kfree_skb(head);
-	else if (copied)
-		kcm_tx_msg(head)->last_skb = skb;
+		kcm->seq_skb = NULL;
+	}
 
 	err = sk_stream_error(sk, msg->msg_flags, err);
 
-- 
cgit v1.2.3