From 0e442afd92fcdde2cc63b6f25556b8934e42b7d2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Sep 2009 11:10:15 -0700 Subject: IB/mad: Fix lock-lock-timer deadlock in RMPP code Holding agent->lock across cancel_delayed_work() (which does del_timer_sync()) in ib_cancel_rmpp_recvs() leads to lockdep reports of possible lock-timer deadlocks if a consumer ever does something that connects agent->lock to a lock taken in IRQ context (cf http://marc.info/?l=linux-rdma&m=125243699026045). Fix this by changing the list items to a new state "CANCELING" while holding the lock, and then canceling the delayed work without holding the lock. If the delayed work runs after the lock is dropped, it will see the state is CANCELING and return immediately, so the list will stay stable while we traverse it with the lock not held. Reviewed-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_rmpp.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 57a3c6f947b2..4e0f2829e0e5 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -37,7 +37,8 @@ enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE + RMPP_STATE_COMPLETE, + RMPP_STATE_CANCELING }; struct mad_rmpp_recv { @@ -86,19 +87,23 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) unsigned long flags; spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + rmpp_recv->state = RMPP_STATE_CANCELING; + } + spin_unlock_irqrestore(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } - spin_unlock_irqrestore(&agent->lock, flags); flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); destroy_rmpp_recv(rmpp_recv); } } @@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state == RMPP_STATE_CANCELING) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); -- cgit v1.2.3 From bdf643816a2017eba9af280b6d29ef4213358984 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 24 Sep 2009 10:59:34 -0700 Subject: RDMA/nes: Remove duplicate .ndo_set_mac_address field initialization The definition of nes_netdev_ops has initializations of a local function and eth_mac_addr for its ndo_set_mac_address field. This change uses only the local function. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier I, s, fld; position p0,p; expression E; @@ struct I s =@p0 { ... .fld@p = E, ...}; @s@ identifier I, s, r.fld; position r.p0,p; expression E; @@ struct I s =@p0 { ... .fld@p = E, ...}; @script:python@ p0 << r.p0; fld << r.fld; ps << s.p; pr << r.p; @@ if int(ps[0].line)!=int(pr[0].line) or int(ps[0].column)!=int(pr[0].column): cocci.print_main(fld,p0) // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_nic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index c6e6611d3016..0bfa2546569f 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = { .ndo_set_mac_address = nes_netdev_set_mac_address, .ndo_set_multicast_list = nes_netdev_set_multicast_list, .ndo_change_mtu = nes_netdev_change_mtu, - .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_register = nes_netdev_vlan_rx_register, }; -- cgit v1.2.3 From d686159e50c57788001001e9537aa8b4bbc38001 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 24 Sep 2009 11:55:41 -0700 Subject: IB/mthca: Fix access to freed memory in catastrophic event handling catas_reset() uses a pointer to mthca_dev, but mthca_dev is not valid after the call to __mthca_restart_one(). Based on a similar patch for mlx4 (634354d7, "mlx4: Fix access to freed memory") by Vitaliy Gusev Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 056b2a4c6970..0aa0110e4b6c 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work) spin_unlock_irq(&catas_lock); list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + struct pci_dev *pdev = dev->pdev; ret = __mthca_restart_one(dev->pdev); + /* 'dev' now is not valid */ if (ret) - mthca_err(dev, "Reset failed (%d)\n", ret); - else - mthca_dbg(dev, "Reset succeeded\n"); + printk(KERN_ERR "mthca %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else { + struct mthca_dev *d = pci_get_drvdata(pdev); + mthca_dbg(d, "Reset succeeded\n"); + } } mutex_unlock(&mthca_device_mutex); -- cgit v1.2.3