From 7f502361531e9eecb396cf99bdc9e9a59f7ebd7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2014 01:26:23 -0700 Subject: ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix We have two different ways to handle changes to sk->sk_dst First way (used by TCP) assumes socket lock is owned by caller, and use no extra lock : __sk_dst_set() & __sk_dst_reset() Another way (used by UDP) uses sk_dst_lock because socket lock is not always taken. Note that sk_dst_lock is not softirq safe. These ways are not inter changeable for a given socket type. ipv4_sk_update_pmtu(), added in linux-3.8, added a race, as it used the socket lock as synchronization, but users might be UDP sockets. Instead of converting sk_dst_lock to a softirq safe version, use xchg() as we did for sk_rx_dst in commit e47eb5dfb296b ("udp: ipv4: do not use sk_dst_lock from softirq context") In a follow up patch, we probably can remove sk_dst_lock, as it is only used in IPv6. Signed-off-by: Eric Dumazet Cc: Steffen Klassert Fixes: 9cb3a50c5f63e ("ipv4: Invalidate the socket cached route on pmtu events if possible") Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 173cae485de1..c556fd9b05ac 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1768,9 +1768,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_set(sk, dst); - spin_unlock(&sk->sk_dst_lock); + struct dst_entry *old_dst; + + sk_tx_queue_clear(sk); + old_dst = xchg(&sk->sk_dst_cache, dst); + dst_release(old_dst); } static inline void @@ -1782,9 +1784,7 @@ __sk_dst_reset(struct sock *sk) static inline void sk_dst_reset(struct sock *sk) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_reset(sk); - spin_unlock(&sk->sk_dst_lock); + sk_dst_set(sk, NULL); } struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); -- cgit v1.2.3 From d9daa24720891a88bedb93928f57767da96e5c80 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 28 Jun 2014 01:23:35 +0200 Subject: net: fix circular dependency in of_mdio code Commit 86f6cf4127 (net: of_mdio: add of_mdiobus_link_phydev()) introduced a circular dependency between libphy and of_mdio. depmod: ERROR: /kernel/drivers/net/phy/libphy.ko in dependency cycle! depmod: ERROR: /kernel/drivers/of/of_mdio.ko in dependency cycle! The problem is that of_mdio.c references &mdio_bus_type and libphy now references of_mdiobus_link_phydev. Fix this by not exporting of_mdiobus_link_phydev() from of_mdio.ko. Make it a static function in mdio_bus.c instead. Signed-off-by: Daniel Mack Reported-by: Jeff Mahoney Fixes: 86f6cf4127 (net: of_mdio: add of_mdiobus_link_phydev()) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ drivers/of/of_mdio.c | 34 ---------------------------------- include/linux/of_mdio.h | 8 -------- 3 files changed, 44 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2e58aa54484c..4eaadcfcb0fe 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -187,6 +187,50 @@ struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np) return d ? to_mii_bus(d) : NULL; } EXPORT_SYMBOL(of_mdio_find_bus); + +/* Walk the list of subnodes of a mdio bus and look for a node that matches the + * phy's address with its 'reg' property. If found, set the of_node pointer for + * the phy. This allows auto-probed pyh devices to be supplied with information + * passed in via DT. + */ +static void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ + struct device *dev = &phydev->dev; + struct device_node *child; + + if (dev->of_node || !mdio->dev.of_node) + return; + + for_each_available_child_of_node(mdio->dev.of_node, child) { + int addr; + int ret; + + ret = of_property_read_u32(child, "reg", &addr); + if (ret < 0) { + dev_err(dev, "%s has invalid PHY address\n", + child->full_name); + continue; + } + + /* A PHY must have a reg property in the range [0-31] */ + if (addr >= PHY_MAX_ADDR) { + dev_err(dev, "%s PHY address %i is too large\n", + child->full_name, addr); + continue; + } + + if (addr == phydev->addr) { + dev->of_node = child; + return; + } + } +} +#else /* !IS_ENABLED(CONFIG_OF_MDIO) */ +static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ +} #endif /** diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index a3bf2122a8d5..401b2453da45 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -182,40 +182,6 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } EXPORT_SYMBOL(of_mdiobus_register); -/** - * of_mdiobus_link_phydev - Find a device node for a phy - * @mdio: pointer to mii_bus structure - * @phydev: phydev for which the of_node pointer should be set - * - * Walk the list of subnodes of a mdio bus and look for a node that matches the - * phy's address with its 'reg' property. If found, set the of_node pointer for - * the phy. This allows auto-probed pyh devices to be supplied with information - * passed in via DT. - */ -void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev) -{ - struct device *dev = &phydev->dev; - struct device_node *child; - - if (dev->of_node || !mdio->dev.of_node) - return; - - for_each_available_child_of_node(mdio->dev.of_node, child) { - int addr; - - addr = of_mdio_parse_addr(&mdio->dev, child); - if (addr < 0) - continue; - - if (addr == phydev->addr) { - dev->of_node = child; - return; - } - } -} -EXPORT_SYMBOL(of_mdiobus_link_phydev); - /* Helper function for of_phy_find_device */ static int of_phy_match(struct device *dev, void *phy_np) { diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index a70c9493d55a..d449018d0726 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -25,9 +25,6 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); -extern void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev); - #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { @@ -63,11 +60,6 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } - -static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev) -{ -} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.2.3 From 5925a0555bdaf0b396a84318cbc21ba085f6c0d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jul 2014 02:39:38 -0700 Subject: net: fix sparse warning in sk_dst_set() sk_dst_cache has __rcu annotation, so we need a cast to avoid following sparse error : include/net/sock.h:1774:19: warning: incorrect type in initializer (different address spaces) include/net/sock.h:1774:19: expected struct dst_entry [noderef] *__ret include/net/sock.h:1774:19: got struct dst_entry *dst Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Fixes: 7f502361531e ("ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix") Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c556fd9b05ac..156350745700 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1771,7 +1771,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - old_dst = xchg(&sk->sk_dst_cache, dst); + old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } -- cgit v1.2.3 From 35f6f45368632f21bd27559c44dbb1cab51d8947 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 29 Jun 2014 11:54:55 +0300 Subject: net/mlx4_en: Don't use irq_affinity_notifier to track changes in IRQ affinity map IRQ affinity notifier can only have a single notifier - cpu_rmap notifier. Can't use it to track changes in IRQ affinity map. Detect IRQ affinity changes by comparing CPU to current IRQ affinity map during NAPI poll thread. CC: Thomas Gleixner CC: Ben Hutchings Fixes: 2eacc23 ("net/mlx4_core: Enforce irq affinity changes immediatly") Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 2 - drivers/net/ethernet/mellanox/mlx4/en_cq.c | 4 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 16 +++++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 --- drivers/net/ethernet/mellanox/mlx4/eq.c | 69 ++++------------------------ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 4 +- 7 files changed, 28 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 80f725228f5b..56022d647837 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -294,8 +294,6 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, init_completion(&cq->free); cq->irq = priv->eq_table.eq[cq->vector].irq; - cq->irq_affinity_change = false; - return 0; err_radix: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 4b2130760eed..1213cc71348c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -128,6 +128,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", name); } + + cq->irq_desc = + irq_to_desc(mlx4_eq_get_irq(mdev->dev, + cq->vector)); } } else { cq->vector = (cq->ring + 1 + priv->port) % diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d2d415732d99..96724170308a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "mlx4_en.h" @@ -896,16 +897,25 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { + int cpu_curr; + const struct cpumask *aff; + INC_PERF_COUNTER(priv->pstats.napi_quota); - if (unlikely(cq->mcq.irq_affinity_change)) { - cq->mcq.irq_affinity_change = false; + + cpu_curr = smp_processor_id(); + aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; + + if (unlikely(!cpumask_test_cpu(cpu_curr, aff))) { + /* Current cpu is not according to smp_irq_affinity - + * probably affinity changed. need to stop this NAPI + * poll, and restart it on the right CPU + */ napi_complete(napi); mlx4_en_arm_cq(priv, cq); return 0; } } else { /* Done for now */ - cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8be7483f8236..ac3dead3792c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -474,15 +474,9 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done < budget) { /* Done for now */ - cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return done; - } else if (unlikely(cq->mcq.irq_affinity_change)) { - cq->mcq.irq_affinity_change = false; - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); - return 0; } return budget; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index d954ec1eac17..2a004b347e1d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -53,11 +53,6 @@ enum { MLX4_EQ_ENTRY_SIZE = 0x20 }; -struct mlx4_irq_notify { - void *arg; - struct irq_affinity_notify notify; -}; - #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) @@ -1088,57 +1083,6 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev) iounmap(priv->clr_base); } -static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct mlx4_irq_notify *n = container_of(notify, - struct mlx4_irq_notify, - notify); - struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; - struct radix_tree_iter iter; - void **slot; - - radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { - struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); - - if (cq->irq == notify->irq) - cq->irq_affinity_change = true; - } -} - -static void mlx4_release_irq_notifier(struct kref *ref) -{ - struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, - notify.kref); - kfree(n); -} - -static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, - struct mlx4_dev *dev, int irq) -{ - struct mlx4_irq_notify *irq_notifier = NULL; - int err = 0; - - irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); - if (!irq_notifier) { - mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", - irq); - return; - } - - irq_notifier->notify.irq = irq; - irq_notifier->notify.notify = mlx4_irq_notifier_notify; - irq_notifier->notify.release = mlx4_release_irq_notifier; - irq_notifier->arg = priv; - err = irq_set_affinity_notifier(irq, &irq_notifier->notify); - if (err) { - kfree(irq_notifier); - irq_notifier = NULL; - mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); - } -} - - int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1409,8 +1353,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, continue; /*we dont want to break here*/ } - mlx4_assign_irq_notifier(priv, dev, - priv->eq_table.eq[vec].irq); eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1427,6 +1369,14 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } EXPORT_SYMBOL(mlx4_assign_eq); +int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->eq_table.eq[vec].irq; +} +EXPORT_SYMBOL(mlx4_eq_get_irq); + void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1438,9 +1388,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { - irq_set_affinity_notifier( - priv->eq_table.eq[vec].irq, - NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 0e15295bedd6..624e1939e9ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -343,6 +343,7 @@ struct mlx4_en_cq { #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ #endif /* CONFIG_NET_RX_BUSY_POLL */ + struct irq_desc *irq_desc; }; struct mlx4_en_port_profile { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b12f4bbd064c..35b51e7af886 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -578,8 +578,6 @@ struct mlx4_cq { u32 cons_index; u16 irq; - bool irq_affinity_change; - __be32 *set_ci_db; __be32 *arm_db; int arm_sn; @@ -1167,6 +1165,8 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); + int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -- cgit v1.2.3 From 48bc03433cfdcac7a3bbb233d5c9f95297a0f5ab Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 29 Jun 2014 13:10:18 +0200 Subject: ieee802154: reassembly: fix possible buffer overflow The max_dsize attribute in ctl_table for lowpan_frags_ns_ctl_table is configured with integer accessing methods. This patch change the max_dsize attribute to int to avoid a possible buffer overflow. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/netns/ieee802154_6lowpan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 079030c853d8..e2070960bac0 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan { struct netns_ieee802154_lowpan { struct netns_sysctl_lowpan sysctl; struct netns_frags frags; - u16 max_dsize; + int max_dsize; }; #endif -- cgit v1.2.3 From 9ecf07a1d8f70f72ec99a0f102c8aa24609d84f4 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 12 Jul 2014 22:36:44 +0200 Subject: neigh: sysctl - simplify address calculation of gc_* variables The code in neigh_sysctl_register() relies on a specific layout of struct neigh_table, namely that the 'gc_*' variables are directly following the 'parms' member in a specific order. The code, though, expresses this in the most ugly way. Get rid of the ugly casts and use the 'tbl' pointer to get a handle to the table. This way we can refer to the 'gc_*' variables directly. Similarly seen in the grsecurity patch, written by Brad Spengler. Signed-off-by: Mathias Krause Cc: Brad Spengler Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 - net/core/neighbour.c | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7277caf3743d..47f425464f84 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -203,7 +203,6 @@ struct neigh_table { void (*proxy_redo)(struct sk_buff *skb); char *id; struct neigh_parms parms; - /* HACK. gc_* should follow parms without a gap! */ int gc_interval; int gc_thresh1; int gc_thresh2; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 32d872eec7f5..559890b0f0a2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { + struct neigh_table *tbl = p->tbl; dev_name_source = "default"; - t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); - t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; - t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; - t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } if (handler) { -- cgit v1.2.3