From a568814a55a0e82bbc7c7b51333d0c38e8fb5520 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:39:21 +0300 Subject: RDMA/siw: Properly check send and receive CQ pointers The check for the NULL of pointer received from container_of() is incorrect by definition as it points to some offset from NULL. Change such check with proper NULL check of SIW QP attributes. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://lore.kernel.org/r/a7535a82925f6f4c1f062abaa294f3ae6e54bdd2.1620560310.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index d2313efb26db..917c8a919f38 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -300,7 +300,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; @@ -343,10 +342,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, rv = -EINVAL; goto err_out; } - scq = to_siw_cq(attrs->send_cq); - rcq = to_siw_cq(attrs->recv_cq); - if (!scq || (!rcq && !attrs->srq)) { + if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; goto err_out; @@ -401,8 +398,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, } } qp->pd = pd; - qp->scq = scq; - qp->rcq = rcq; + qp->scq = to_siw_cq(attrs->send_cq); + qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* -- cgit v1.2.3 From a3d83276d98886879b5bf7b30b7c29882754e4df Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:41:38 +0300 Subject: RDMA/siw: Release xarray entry The xarray entry is allocated in siw_qp_add(), but release was missed in case zero-sized SQ was discovered. Fixes: 661f385961f0 ("RDMA/siw: Fix handling of zero-sized Read and Receive Queues.") Link: https://lore.kernel.org/r/f070b59d5a1114d5a4e830346755c2b3f141cde5.1620560472.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 917c8a919f38..3f175f220a22 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -375,7 +375,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, else { /* Zero sized SQ is not supported */ rv = -EINVAL; - goto err_out; + goto err_out_xa; } if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); -- cgit v1.2.3 From 54d87913f147a983589923c7f651f97de9af5be1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 10 May 2021 17:46:00 +0300 Subject: RDMA/core: Prevent divide-by-zero error triggered by the user The user_entry_size is supplied by the user and later used as a denominator to calculate number of entries. The zero supplied by the user will trigger the following divide-by-zero error: divide error: 0000 [#1] SMP KASAN PTI CPU: 4 PID: 497 Comm: c_repro Not tainted 5.13.0-rc1+ #281 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE+0x1b1/0x510 Code: 87 59 03 00 00 e8 9f ab 1e ff 48 8d bd a8 00 00 00 e8 d3 70 41 ff 44 0f b7 b5 a8 00 00 00 e8 86 ab 1e ff 31 d2 4c 89 f0 31 ff <49> f7 f5 48 89 d6 48 89 54 24 10 48 89 04 24 e8 1b ad 1e ff 48 8b RSP: 0018:ffff88810416f828 EFLAGS: 00010246 RAX: 0000000000000008 RBX: 1ffff1102082df09 RCX: ffffffff82183f3d RDX: 0000000000000000 RSI: ffff888105f2da00 RDI: 0000000000000000 RBP: ffff88810416fa98 R08: 0000000000000001 R09: ffffed102082df5f R10: ffff88810416faf7 R11: ffffed102082df5e R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88810416faf0 FS: 00007f5715efa740(0000) GS:ffff88811a700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000840 CR3: 000000010c2e0001 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ib_uverbs_handler_UVERBS_METHOD_INFO_HANDLES+0x4b0/0x4b0 ib_uverbs_cmd_verbs+0x1546/0x1940 ib_uverbs_ioctl+0x186/0x240 __x64_sys_ioctl+0x38a/0x1220 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/b971cc70a8b240a8b5eda33c99fa0558a0071be2.1620657876.git.leonro@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 9ec6971056fa..a03021d94e11 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -331,6 +331,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( if (ret) return ret; + if (!user_entry_size) + return -EINVAL; + max_entries = uverbs_attr_ptr_get_array_size( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); -- cgit v1.2.3 From 67f29896fdc83298eed5a6576ff8f9873f709228 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 May 2021 10:26:03 +0300 Subject: RDMA/rxe: Clear all QP fields if creation failed rxe_qp_do_cleanup() relies on valid pointer values in QP for the properly created ones, but in case rxe_qp_from_init() failed it was filled with garbage and caused tot the following error. refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 12560 at lib/refcount.c:28 refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Modules linked in: CPU: 1 PID: 12560 Comm: syz-executor.4 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Code: e9 db fe ff ff 48 89 df e8 2c c2 ea fd e9 8a fe ff ff e8 72 6a a7 fd 48 c7 c7 e0 b2 c1 89 c6 05 dc 3a e6 09 01 e8 ee 74 fb 04 <0f> 0b e9 af fe ff ff 0f 1f 84 00 00 00 00 00 41 56 41 55 41 54 55 RSP: 0018:ffffc900097ceba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815bb075 RDI: fffff520012f9d67 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815b4eae R11: 0000000000000000 R12: ffff8880322a4800 R13: ffff8880322a4940 R14: ffff888033044e00 R15: 0000000000000000 FS: 00007f6eb2be3700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdbe5d41000 CR3: 000000001d181000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] kref_put include/linux/kref.h:64 [inline] rxe_qp_do_cleanup+0x96f/0xaf0 drivers/infiniband/sw/rxe/rxe_qp.c:805 execute_in_process_context+0x37/0x150 kernel/workqueue.c:3327 rxe_elem_release+0x9f/0x180 drivers/infiniband/sw/rxe/rxe_pool.c:391 kref_put include/linux/kref.h:65 [inline] rxe_create_qp+0x2cd/0x310 drivers/infiniband/sw/rxe/rxe_verbs.c:425 _ib_create_qp drivers/infiniband/core/core_priv.h:331 [inline] ib_create_named_qp+0x2ad/0x1370 drivers/infiniband/core/verbs.c:1231 ib_create_qp include/rdma/ib_verbs.h:3644 [inline] create_mad_qp+0x177/0x2d0 drivers/infiniband/core/mad.c:2920 ib_mad_port_open drivers/infiniband/core/mad.c:3001 [inline] ib_mad_init_device+0xd6f/0x1400 drivers/infiniband/core/mad.c:3092 add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:717 enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1331 ib_register_device drivers/infiniband/core/device.c:1413 [inline] ib_register_device+0x7c7/0xa50 drivers/infiniband/core/device.c:1365 rxe_register_device+0x3d5/0x4a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1147 rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247 rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:503 rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline] rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250 nldev_newlink+0x30e/0x550 drivers/infiniband/core/nldev.c:1555 rdma_nl_rcv_msg+0x36d/0x690 drivers/infiniband/core/netlink.c:195 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0x2ee/0x430 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/7bf8d548764d406dbbbaf4b574960ebfd5af8387.1620717918.git.leonro@nvidia.com Reported-by: syzbot+36a7f280de4e11c6f04e@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 34ae957a315c..b0f350d674fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -242,6 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->sq.queue->buf); kfree(qp->sq.queue); + qp->sq.queue = NULL; return err; } @@ -295,6 +296,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->rq.queue->buf); kfree(qp->rq.queue); + qp->rq.queue = NULL; return err; } } @@ -355,6 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, err2: rxe_queue_cleanup(qp->sq.queue); err1: + qp->pd = NULL; + qp->rcq = NULL; + qp->scq = NULL; + qp->srq = NULL; + if (srq) rxe_drop_ref(srq); rxe_drop_ref(scq); -- cgit v1.2.3 From 6863b4d7bf19a54e23fc5838b7e66d954444289d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 11 May 2021 08:48:27 +0300 Subject: RDMA/mlx5: Verify that DM operation is reasonable Fix the complaint from smatch by verifing that the user requested DM operation is not greater than 31. divers/infiniband/hw/mlx5/dm.c:220 mlx5_ib_handler_MLX5_IB_METHOD_DM_MAP_OP_ADDR() error: undefined (user controlled) shift '(((1))) << op' Fixes: cea85fa5dbc2 ("RDMA/mlx5: Add support in MEMIC operations") Link: https://lore.kernel.org/r/458b1d7710c3cf01360c8771893f483665569786.1620711734.git.leonro@nvidia.com Reported-by: Dan Carpenter Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/dm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index 094bf85589db..001d766cf291 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -217,6 +217,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)( if (err) return err; + if (op >= BITS_PER_TYPE(u32)) + return -EOPNOTSUPP; + if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op))) return -EOPNOTSUPP; -- cgit v1.2.3 From 97f30d324ce6645a4de4ffb71e4ae9b8ca36ff04 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 11 May 2021 08:48:29 +0300 Subject: RDMA/mlx5: Recover from fatal event in dual port mode When there is fatal event on the slave port, the device is marked as not active. We need to mark it as active again when the slave is recovered to regain full functionality. Fixes: d69a24e03659 ("IB/mlx5: Move IB event processing onto a workqueue") Link: https://lore.kernel.org/r/8906754455bb23019ef223c725d2c0d38acfb80b.1620711734.git.leonro@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6d1dd09a4388..644d5d0ac544 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4419,6 +4419,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, if (bound) { rdma_roce_rescan_device(&dev->ib_dev); + mpi->ibdev->ib_active = true; break; } } -- cgit v1.2.3 From dc07628bd2bbc1da768e265192c28ebd301f509d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 May 2021 08:48:31 +0300 Subject: RDMA/rxe: Return CQE error if invalid lkey was supplied RXE is missing update of WQE status in LOCAL_WRITE failures. This caused the following kernel panic if someone sent an atomic operation with an explicitly wrong lkey. [leonro@vm ~]$ mkt test test_atomic_invalid_lkey (tests.test_atomic.AtomicTest) ... WARNING: CPU: 5 PID: 263 at drivers/infiniband/sw/rxe/rxe_comp.c:740 rxe_completer+0x1a6d/0x2e30 [rdma_rxe] Modules linked in: crc32_generic rdma_rxe ip6_udp_tunnel udp_tunnel rdma_ucm rdma_cm ib_umad ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core ptp pps_core CPU: 5 PID: 263 Comm: python3 Not tainted 5.13.0-rc1+ #2936 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:rxe_completer+0x1a6d/0x2e30 [rdma_rxe] Code: 03 0f 8e 65 0e 00 00 3b 93 10 06 00 00 0f 84 82 0a 00 00 4c 89 ff 4c 89 44 24 38 e8 2d 74 a9 e1 4c 8b 44 24 38 e9 1c f5 ff ff <0f> 0b e9 0c e8 ff ff b8 05 00 00 00 41 bf 05 00 00 00 e9 ab e7 ff RSP: 0018:ffff8880158af090 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888016a78000 RCX: ffffffffa0cf1652 RDX: 1ffff9200004b442 RSI: 0000000000000004 RDI: ffffc9000025a210 RBP: dffffc0000000000 R08: 00000000ffffffea R09: ffff88801617740b R10: ffffed1002c2ee81 R11: 0000000000000007 R12: ffff88800f3b63e8 R13: ffff888016a78008 R14: ffffc9000025a180 R15: 000000000000000c FS: 00007f88b622a740(0000) GS:ffff88806d540000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f88b5a1fa10 CR3: 000000000d848004 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rxe_do_task+0x130/0x230 [rdma_rxe] rxe_rcv+0xb11/0x1df0 [rdma_rxe] rxe_loopback+0x157/0x1e0 [rdma_rxe] rxe_responder+0x5532/0x7620 [rdma_rxe] rxe_do_task+0x130/0x230 [rdma_rxe] rxe_rcv+0x9c8/0x1df0 [rdma_rxe] rxe_loopback+0x157/0x1e0 [rdma_rxe] rxe_requester+0x1efd/0x58c0 [rdma_rxe] rxe_do_task+0x130/0x230 [rdma_rxe] rxe_post_send+0x998/0x1860 [rdma_rxe] ib_uverbs_post_send+0xd5f/0x1220 [ib_uverbs] ib_uverbs_write+0x847/0xc80 [ib_uverbs] vfs_write+0x1c5/0x840 ksys_write+0x176/0x1d0 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/11e7b553f3a6f5371c6bb3f57c494bb52b88af99.1620711734.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Acked-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 2af26737d32d..a6712e373eed 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -346,13 +346,15 @@ static inline enum comp_state do_read(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, payload_addr(pkt), payload_size(pkt), to_mr_obj, NULL); - if (ret) + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; + } if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) return COMPST_COMP_ACK; - else - return COMPST_UPDATE_COMP; + + return COMPST_UPDATE_COMP; } static inline enum comp_state do_atomic(struct rxe_qp *qp, @@ -366,10 +368,12 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, &atomic_orig, sizeof(u64), to_mr_obj, NULL); - if (ret) + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; - else - return COMPST_COMP_ACK; + } + + return COMPST_COMP_ACK; } static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, -- cgit v1.2.3 From 889d916b6f8a48b8c9489fffcad3b78eedd01a51 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 11 May 2021 08:48:28 +0300 Subject: RDMA/core: Don't access cm_id after its destruction restrack should only be attached to a cm_id while the ID has a valid device pointer. It is set up when the device is first loaded, but not cleared when the device is removed. There is also two copies of the device pointer, one private and one in the public API, and these were left out of sync. Make everything go to NULL together and manipulate restrack right around the device assignments. Found by syzcaller: BUG: KASAN: wild-memory-access in __list_del include/linux/list.h:112 [inline] BUG: KASAN: wild-memory-access in __list_del_entry include/linux/list.h:135 [inline] BUG: KASAN: wild-memory-access in list_del include/linux/list.h:146 [inline] BUG: KASAN: wild-memory-access in cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline] BUG: KASAN: wild-memory-access in cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline] BUG: KASAN: wild-memory-access in cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783 Write of size 8 at addr dead000000000108 by task syz-executor716/334 CPU: 0 PID: 334 Comm: syz-executor716 Not tainted 5.11.0+ #271 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xbe/0xf9 lib/dump_stack.c:120 __kasan_report mm/kasan/report.c:400 [inline] kasan_report.cold+0x5f/0xd5 mm/kasan/report.c:413 __list_del include/linux/list.h:112 [inline] __list_del_entry include/linux/list.h:135 [inline] list_del include/linux/list.h:146 [inline] cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline] cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline] cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783 _destroy_id+0x29/0x460 drivers/infiniband/core/cma.c:1862 ucma_close_id+0x36/0x50 drivers/infiniband/core/ucma.c:185 ucma_destroy_private_ctx+0x58d/0x5b0 drivers/infiniband/core/ucma.c:576 ucma_close+0x91/0xd0 drivers/infiniband/core/ucma.c:1797 __fput+0x169/0x540 fs/file_table.c:280 task_work_run+0xb7/0x100 kernel/task_work.c:140 exit_task_work include/linux/task_work.h:30 [inline] do_exit+0x7da/0x17f0 kernel/exit.c:825 do_group_exit+0x9e/0x190 kernel/exit.c:922 __do_sys_exit_group kernel/exit.c:933 [inline] __se_sys_exit_group kernel/exit.c:931 [inline] __x64_sys_exit_group+0x2d/0x30 kernel/exit.c:931 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 255d0c14b375 ("RDMA/cma: rdma_bind_addr() leaks a cma_dev reference count") Link: https://lore.kernel.org/r/3352ee288fe34f2b44220457a29bfc0548686363.1620711734.git.leonro@nvidia.com Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2b9ffc21cbc4..ab148a696c0c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + id_priv->id.device = NULL; if (id_priv->id.route.addr.dev_addr.sgid_attr) { rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); id_priv->id.route.addr.dev_addr.sgid_attr = NULL; @@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, iw_destroy_cm_id(id_priv->cm_id.iw); } cma_leave_mc_groups(id_priv); + rdma_restrack_del(&id_priv->res); cma_release_dev(id_priv); } @@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); put_net(id_priv->id.route.addr.dev_addr.net); - rdma_restrack_del(&id_priv->res); kfree(id_priv); } @@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) } id_priv->backlog = backlog; - if (id->device) { + if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) -- cgit v1.2.3 From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 May 2021 13:52:27 +0300 Subject: {net, RDMA}/mlx5: Fix override of log_max_qp by other device mlx5_core_dev holds pointer to static profile, hence when the log_max_qp of the profile is override by some device, then it effect all other mlx5 devices that share the same profile. Fix it by having a profile instance for every mlx5 device. Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mr.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +++---- include/linux/mlx5/driver.h | 44 +++++++++++++------------- 3 files changed, 29 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4388afeff251..9662cd39c7ff 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -743,10 +743,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && mlx5_ib_can_load_pas_with_umr(dev, 0)) - ent->limit = dev->mdev->profile->mr_cache[i].limit; + ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; spin_lock_irq(&ent->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c114365eb126..a1d67bd7fb43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { - struct mlx5_profile *prof = dev->profile; + struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; int err; @@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", - profile[prof_sel].log_max_qp, + prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); - profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) struct mlx5_priv *priv = &dev->priv; int err; - dev->profile = &profile[profile_idx]; - + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->intf_state_mutex); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f8e8d7e90616..020a8f7fdbdd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -703,6 +703,27 @@ struct mlx5_hv_vhca; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, +}; + +enum { + MR_CACHE_LAST_STD_ENTRY = 20, + MLX5_IMR_MTT_CACHE_ENTRY, + MLX5_IMR_KSM_CACHE_ENTRY, + MAX_MR_CACHE_ENTRIES +}; + +struct mlx5_profile { + u64 mask; + u8 log_max_qp; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -731,7 +752,7 @@ struct mlx5_core_dev { struct mutex intf_state_mutex; unsigned long intf_state; struct mlx5_priv priv; - struct mlx5_profile *profile; + struct mlx5_profile profile; u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_dm *dm; @@ -1083,18 +1104,6 @@ static inline u8 mlx5_mkey_variant(u32 mkey) return mkey & 0xff; } -enum { - MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, -}; - -enum { - MR_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MR_CACHE_ENTRIES -}; - /* Async-atomic event notifier used by mlx5 core to forward FW * evetns recived from event queue to mlx5 consumers. * Optimise event queue dipatching. @@ -1148,15 +1157,6 @@ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); -struct mlx5_profile { - u64 mask; - u8 log_max_qp; - struct { - int size; - int limit; - } mr_cache[MAX_MR_CACHE_ENTRIES]; -}; - enum { MLX5_PCI_DEV_IS_VF = 1 << 0, }; -- cgit v1.2.3 From cfa3b797118eda7d68f9ede9b1a0279192aca653 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 19 May 2021 11:41:32 +0300 Subject: RDMA/mlx5: Fix query DCT via DEVX When executing DEVX command to query QP object, we need to take the QP type from the mlx5_ib_qp struct which hold the driver specific QP types as well, such as DC. Fixes: 34613eb1d2ad ("IB/mlx5: Enable modify and query verbs objects via DEVX") Link: https://lore.kernel.org/r/6eee15d63f09bb70787488e0cf96216e2957f5aa.1621413654.git.leonro@nvidia.com Reviewed-by: Yishai Hadas Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index a0b677accd96..eb9b0a2707f8 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -630,9 +630,8 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, case UVERBS_OBJECT_QP: { struct mlx5_ib_qp *qp = to_mqp(uobj->object); - enum ib_qp_type qp_type = qp->ibqp.qp_type; - if (qp_type == IB_QPT_RAW_PACKET || + if (qp->type == IB_QPT_RAW_PACKET || (qp->flags & IB_QP_CREATE_SOURCE_QPN)) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -649,10 +648,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, sq->tisn) == obj_id); } - if (qp_type == MLX5_IB_QPT_DCT) + if (qp->type == MLX5_IB_QPT_DCT) return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, qp->dct.mdct.mqp.qpn) == obj_id; - return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, qp->ibqp.qp_num) == obj_id; } -- cgit v1.2.3 From 463a3f66473b58d71428a1c3ce69ea52c05440e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 May 2021 17:18:10 +0300 Subject: RDMA/uverbs: Fix a NULL vs IS_ERR() bug The uapi_get_object() function returns error pointers, it never returns NULL. Fixes: 149d3845f4a5 ("RDMA/uverbs: Add a method to introspect handles in a context") Link: https://lore.kernel.org/r/YJ6Got+U7lz+3n9a@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index a03021d94e11..049684880ae0 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -117,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)( return ret; uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id); - if (!uapi_object) - return -EINVAL; + if (IS_ERR(uapi_object)) + return PTR_ERR(uapi_object); handles = gather_objects_handle(attrs->ufile, uapi_object, attrs, out_len, &total); -- cgit v1.2.3 From a3e74fb9247cd530dca246699d5eb5a691884d32 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 25 May 2021 18:01:34 +0300 Subject: RDMA/ipoib: Fix warning caused by destroying non-initial netns After the commit 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces"), if the IPoIB device is moved to non-initial netns, destroying that netns lets the device vanish instead of moving it back to the initial netns, This is happening because default_device_exit() skips the interfaces due to having rtnl_link_ops set. Steps to reporoduce: ip netns add foo ip link set mlx5_ib0 netns foo ip netns delete foo WARNING: CPU: 1 PID: 704 at net/core/dev.c:11435 netdev_exit+0x3f/0x50 Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun d fuse CPU: 1 PID: 704 Comm: kworker/u64:3 Tainted: G S W 5.13.0-rc1+ #1 Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.5 04/11/2016 Workqueue: netns cleanup_net RIP: 0010:netdev_exit+0x3f/0x50 Code: 48 8b bb 30 01 00 00 e8 ef 81 b1 ff 48 81 fb c0 3a 54 a1 74 13 48 8b 83 90 00 00 00 48 81 c3 90 00 00 00 48 39 d8 75 02 5b c3 <0f> 0b 5b c3 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00 RSP: 0018:ffffb297079d7e08 EFLAGS: 00010206 RAX: ffff8eb542c00040 RBX: ffff8eb541333150 RCX: 000000008010000d RDX: 000000008010000e RSI: 000000008010000d RDI: ffff8eb440042c00 RBP: ffffb297079d7e48 R08: 0000000000000001 R09: ffffffff9fdeac00 R10: ffff8eb5003be000 R11: 0000000000000001 R12: ffffffffa1545620 R13: ffffffffa1545628 R14: 0000000000000000 R15: ffffffffa1543b20 FS: 0000000000000000(0000) GS:ffff8ed37fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005601b5f4c2e8 CR3: 0000001fc8c10002 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ops_exit_list.isra.9+0x36/0x70 cleanup_net+0x234/0x390 process_one_work+0x1cb/0x360 ? process_one_work+0x360/0x360 worker_thread+0x30/0x370 ? process_one_work+0x360/0x360 kthread+0x116/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x22/0x30 To avoid the above warning and later on the kernel panic that could happen on shutdown due to a NULL pointer dereference, make sure to set the netns_refund flag that was introduced by commit 3a5ca857079e ("can: dev: Move device back to init netns on owning netns delete") to properly restore the IPoIB interfaces to the initial netns. Fixes: 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces") Link: https://lore.kernel.org/r/20210525150134.139342-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index d5a90a66b45c..5b05cf3837da 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -163,6 +163,7 @@ static size_t ipoib_get_size(const struct net_device *dev) static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .kind = "ipoib", + .netns_refund = true, .maxtype = IFLA_IPOIB_MAX, .policy = ipoib_policy, .priv_size = sizeof(struct ipoib_dev_priv), -- cgit v1.2.3 From a0ffb4c12f7fa89163e228e6f27df09b46631db1 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 3 Jun 2021 16:18:03 +0300 Subject: RDMA/mlx5: Use different doorbell memory for different processes In a fork scenario, the parent and child can have same virtual address and also share the uverbs fd. That causes to the list_for_each_entry search return same doorbell physical page for all processes, even though that page has been COW' or copied. This patch takes the mm_struct into consideration during search, to make sure that VA's belonging to different processes are not intermixed. Resolves the malfunction of uverbs after fork in some specific cases. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/feacc23fe0bc6e1088c6824d5583798745e72405.1622726212.git.leonro@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/doorbell.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 61475b571531..7af4df7a6823 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -41,6 +41,7 @@ struct mlx5_ib_user_db_page { struct ib_umem *umem; unsigned long user_virt; int refcnt; + struct mm_struct *mm; }; int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, @@ -53,7 +54,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, mutex_lock(&context->db_page_mutex); list_for_each_entry(page, &context->db_page_list, list) - if (page->user_virt == (virt & PAGE_MASK)) + if ((current->mm == page->mm) && + (page->user_virt == (virt & PAGE_MASK))) goto found; page = kmalloc(sizeof(*page), GFP_KERNEL); @@ -71,6 +73,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, kfree(page); goto out; } + mmgrab(current->mm); + page->mm = current->mm; list_add(&page->list, &context->db_page_list); @@ -91,6 +95,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) if (!--db->u.user_page->refcnt) { list_del(&db->u.user_page->list); + mmdrop(db->u.user_page->mm); ib_umem_release(db->u.user_page->umem); kfree(db->u.user_page); } -- cgit v1.2.3 From 404e5a12691fe797486475fe28cc0b80cb8bef2c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 3 Jun 2021 16:19:39 +0300 Subject: RDMA/mlx4: Do not map the core_clock page to user space unless enabled Currently when mlx4 maps the hca_core_clock page to the user space there are read-modifiable registers, one of which is semaphore, on this page as well as the clock counter. If user reads the wrong offset, it can modify the semaphore and hang the device. Do not map the hca_core_clock page to the user space unless the device has been put in a backwards compatibility mode to support this feature. After this patch, mlx4 core_clock won't be mapped to user space on the majority of existing devices and the uverbs device time feature in ibv_query_rt_values_ex() will be disabled. Fixes: 52033cfb5aab ("IB/mlx4: Add mmap call to map the hardware clock") Link: https://lore.kernel.org/r/9632304e0d6790af84b3b706d8c18732bc0d5e27.1622726305.git.leonro@nvidia.com Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 5 +---- drivers/net/ethernet/mellanox/mlx4/fw.c | 3 +++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 6 ++++++ include/linux/mlx4/device.h | 1 + 5 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 22898d97ecbd..16704262fc3a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -581,12 +581,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT; props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD; - if (!mlx4_is_slave(dev->dev)) - err = mlx4_get_internal_clock_params(dev->dev, &clock_params); - if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { resp.response_length += sizeof(resp.hca_core_clock_offset); - if (!err && !mlx4_is_slave(dev->dev)) { + if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) { resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET; resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f6cfec81ccc3..dc4ac1a2b6b6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -823,6 +823,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac +#define QUERY_DEV_CAP_MAP_CLOCK_TO_USER 0xc1 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2 @@ -841,6 +842,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (mlx4_is_mfunc(dev)) disable_unsupported_roce_caps(outbox); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAP_CLOCK_TO_USER); + dev_cap->map_clock_to_user = field & 0x80; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); dev_cap->reserved_qps = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 8f020f26ebf5..cf64e54eecb0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -131,6 +131,7 @@ struct mlx4_dev_cap { u32 health_buffer_addrs; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; bool wol_port[MLX4_MAX_PORTS + 1]; + bool map_clock_to_user; }; struct mlx4_func_cap { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index c326b434734e..00c84656b2e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -498,6 +498,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } + dev->caps.map_clock_to_user = dev_cap->map_clock_to_user; dev->caps.uar_page_size = PAGE_SIZE; dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; @@ -1948,6 +1949,11 @@ int mlx4_get_internal_clock_params(struct mlx4_dev *dev, if (mlx4_is_slave(dev)) return -EOPNOTSUPP; + if (!dev->caps.map_clock_to_user) { + mlx4_dbg(dev, "Map clock to user is not supported.\n"); + return -EOPNOTSUPP; + } + if (!params) return -EINVAL; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 236a7d04f891..30bb59fe970c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -630,6 +630,7 @@ struct mlx4_caps { bool wol_port[MLX4_MAX_PORTS + 1]; struct mlx4_rate_limit_caps rl_caps; u32 health_buffer_addrs; + bool map_clock_to_user; }; struct mlx4_buf_list { -- cgit v1.2.3 From edc0b0bccc9c80d9a44d3002dcca94984b25e7cf Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 7 Jun 2021 11:03:12 +0300 Subject: RDMA/mlx5: Block FDB rules when not in switchdev mode Allow creating FDB steering rules only when in switchdev mode. The only software model where a userspace application can manipulate FDB entries is when it manages the eswitch. This is only possible in switchdev mode where we expose a single RDMA device with representors for all the vports that are connected to the eswitch. Fixes: 52438be44112 ("RDMA/mlx5: Allow inserting a steering rule to the FDB") Link: https://lore.kernel.org/r/e928ae7c58d07f104716a2a8d730963d1bd01204.1623052923.git.leonro@nvidia.com Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 2fc6a60c4e77..f84441ff0c81 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2134,6 +2134,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; + if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB && + mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) { + err = -EINVAL; + goto end; + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); -- cgit v1.2.3 From 2adcb4c5a52a2623cd2b43efa7041e74d19f3a5e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Jun 2021 10:34:25 +0300 Subject: RDMA: Verify port when creating flow rule Validate port value provided by the user and with that remove no longer needed validation by the driver. The missing check in the mlx5_ib driver could cause to the below oops. Call trace: _create_flow_rule+0x2d4/0xf28 [mlx5_ib] mlx5_ib_create_flow+0x2d0/0x5b0 [mlx5_ib] ib_uverbs_ex_create_flow+0x4cc/0x624 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xd4/0x150 [ib_uverbs] ib_uverbs_cmd_verbs.isra.7+0xb28/0xc50 [ib_uverbs] ib_uverbs_ioctl+0x158/0x1d0 [ib_uverbs] do_vfs_ioctl+0xd0/0xaf0 ksys_ioctl+0x84/0xb4 __arm64_sys_ioctl+0x28/0xc4 el0_svc_common.constprop.3+0xa4/0x254 el0_svc_handler+0x84/0xa0 el0_svc+0x10/0x26c Code: b9401260 f9615681 51000400 8b001c20 (f9403c1a) Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") Link: https://lore.kernel.org/r/faad30dc5219a01727f47db3dc2f029d07c82c00.1623309971.git.leonro@nvidia.com Reviewed-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 5 +++++ drivers/infiniband/hw/mlx4/main.c | 3 --- drivers/infiniband/hw/mlx5/fs.c | 5 ++--- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d5e15a8c870d..64e4be1cbec7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3248,6 +3248,11 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) goto err_free_attr; } + if (!rdma_is_port_valid(uobj->context->device, cmd.flow_attr.port)) { + err = -EINVAL; + goto err_uobj; + } + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { err = -EINVAL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 16704262fc3a..230a6ae0ab5a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1699,9 +1699,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (!rdma_is_port_valid(qp->device, flow_attr->port)) - return ERR_PTR(-EINVAL); - if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index f84441ff0c81..18ee2f293825 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -1194,9 +1194,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto free_ucmd; } - if (flow_attr->port > dev->num_ports || - (flow_attr->flags & - ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) { + if (flow_attr->flags & + ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) { err = -EINVAL; goto free_ucmd; } -- cgit v1.2.3 From 6466f03fdf98dd78b9453deb8a7cb0d887c09fec Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 10 Jun 2021 10:34:26 +0300 Subject: RDMA/mlx5: Delete right entry from MR signature database The value mr->sig is stored in the entry upon mr allocation, however, ibmr is wrongly entered here as "old", therefore, xa_cmpxchg() does not replace the entry with NULL, which leads to the following trace: WARNING: CPU: 28 PID: 2078 at drivers/infiniband/hw/mlx5/main.c:3643 mlx5_ib_stage_init_cleanup+0x4d/0x60 [mlx5_ib] Modules linked in: nvme_rdma nvme_fabrics nvme_core 8021q garp mrp bonding bridge stp llc rfkill rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_tad CPU: 28 PID: 2078 Comm: reboot Tainted: G X --------- --- 5.13.0-0.rc2.19.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.9.1 12/07/2018 RIP: 0010:mlx5_ib_stage_init_cleanup+0x4d/0x60 [mlx5_ib] Code: 8d bb 70 1f 00 00 be 00 01 00 00 e8 9d 94 ce da 48 3d 00 01 00 00 75 02 5b c3 0f 0b 5b c3 0f 0b 48 83 bb b0 20 00 00 00 74 d5 <0f> 0b eb d1 4 RSP: 0018:ffffa8db06d33c90 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff97f890a44000 RCX: ffff97f900ec0160 RDX: 0000000000000000 RSI: 0000000080080001 RDI: ffff97f890a44000 RBP: ffffffffc0c189b8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000300 R12: ffff97f890a44000 R13: ffffffffc0c36030 R14: 00000000fee1dead R15: 0000000000000000 FS: 00007f0d5a8a3b40(0000) GS:ffff98077fb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000555acbf4f450 CR3: 00000002a6f56002 CR4: 00000000001706e0 Call Trace: mlx5r_remove+0x39/0x60 [mlx5_ib] auxiliary_bus_remove+0x1b/0x30 __device_release_driver+0x17a/0x230 device_release_driver+0x24/0x30 bus_remove_device+0xdb/0x140 device_del+0x18b/0x3e0 mlx5_detach_device+0x59/0x90 [mlx5_core] mlx5_unload_one+0x22/0x60 [mlx5_core] shutdown+0x31/0x3a [mlx5_core] pci_device_shutdown+0x34/0x60 device_shutdown+0x15b/0x1c0 __do_sys_reboot.cold+0x2f/0x5b ? vfs_writev+0xc7/0x140 ? handle_mm_fault+0xc5/0x290 ? do_writev+0x6b/0x110 do_syscall_64+0x40/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: e6fb246ccafb ("RDMA/mlx5: Consolidate MR destruction to mlx5_ib_dereg_mr()") Link: https://lore.kernel.org/r/f3f585ea0db59c2a78f94f65eedeafc5a2374993.1623309971.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9662cd39c7ff..425423dfac72 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1940,8 +1940,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (ibmr->type == IB_MR_TYPE_INTEGRITY) { - xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), ibmr, - NULL, GFP_KERNEL); + xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), + mr->sig, NULL, GFP_KERNEL); if (mr->mtt_mr) { rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); -- cgit v1.2.3 From 2ba0aa2feebda680ecfc3c552e867cf4d1b05a3a Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Thu, 10 Jun 2021 10:34:27 +0300 Subject: IB/mlx5: Fix initializing CQ fragments buffer The function init_cq_frag_buf() can be called to initialize the current CQ fragments buffer cq->buf, or the temporary cq->resize_buf that is filled during CQ resize operation. However, the offending commit started to use function get_cqe() for getting the CQEs, the issue with this change is that get_cqe() always returns CQEs from cq->buf, which leads us to initialize the wrong buffer, and in case of enlarging the CQ we try to access elements beyond the size of the current cq->buf and eventually hit a kernel panic. [exception RIP: init_cq_frag_buf+103] [ffff9f799ddcbcd8] mlx5_ib_resize_cq at ffffffffc0835d60 [mlx5_ib] [ffff9f799ddcbdb0] ib_resize_cq at ffffffffc05270df [ib_core] [ffff9f799ddcbdc0] llt_rdma_setup_qp at ffffffffc0a6a712 [llt] [ffff9f799ddcbe10] llt_rdma_cc_event_action at ffffffffc0a6b411 [llt] [ffff9f799ddcbe98] llt_rdma_client_conn_thread at ffffffffc0a6bb75 [llt] [ffff9f799ddcbec8] kthread at ffffffffa66c5da1 [ffff9f799ddcbf50] ret_from_fork_nospec_begin at ffffffffa6d95ddd Fix it by getting the needed CQE by calling mlx5_frag_buf_get_wqe() that takes the correct source buffer as a parameter. Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)") Link: https://lore.kernel.org/r/90a0e8c924093cfa50a482880ad7e7edb73dc19a.1623309971.git.leonro@nvidia.com Signed-off-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index eb92cefffd77..9ce01f729673 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -849,15 +849,14 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) ib_umem_release(cq->buf.umem); } -static void init_cq_frag_buf(struct mlx5_ib_cq *cq, - struct mlx5_ib_cq_buf *buf) +static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf) { int i; void *cqe; struct mlx5_cqe64 *cqe64; for (i = 0; i < buf->nent; i++) { - cqe = get_cqe(cq, i); + cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; cqe64->op_own = MLX5_CQE_INVALID << 4; } @@ -883,7 +882,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (err) goto err_db; - init_cq_frag_buf(cq, &cq->buf); + init_cq_frag_buf(&cq->buf); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * @@ -1184,7 +1183,7 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (err) goto ex; - init_cq_frag_buf(cq, cq->resize_buf); + init_cq_frag_buf(cq->resize_buf); return 0; -- cgit v1.2.3