aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorBenjamin Poirier2023-07-19 13:57:22 +0000
committerJakub Kicinski2023-07-20 20:23:20 -0700
commit75f5f04c7bd2163044f80d5f7bea6b79edc288fa (patch)
tree5d6aeccd69259599d876042108a0dab3e771e9ad /net/ipv4
parent4bb5239b43348f4a1cd73f44be3dc57ac653ae9c (diff)
nexthop: Do not return invalid nexthop object during multipath selection
With legacy nexthops, when net.ipv4.fib_multipath_use_neigh is set, fib_select_multipath() will never set res->nhc to a nexthop that is not good (as per fib_good_nh()). OTOH, with nexthop objects, nexthop_select_path_hthr() may return a nexthop that failed the nexthop_is_good_nh() test even if there was one that passed. Refactor nexthop_select_path_hthr() to follow a selection logic more similar to fib_select_multipath(). The issue can be demonstrated with the following sequence of commands. The first block shows that things work as expected with legacy nexthops. The last sequence of `ip rou get` in the second block shows the problem case - some routes still use the .2 nexthop. sysctl net.ipv4.fib_multipath_use_neigh=1 ip link add dummy1 up type dummy ip rou add 198.51.100.0/24 nexthop via 192.0.2.1 dev dummy1 onlink nexthop via 192.0.2.2 dev dummy1 onlink for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip neigh add 192.0.2.1 dev dummy1 nud failed echo ".1 failed:" # results should not use .1 for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip neigh del 192.0.2.1 dev dummy1 ip neigh add 192.0.2.2 dev dummy1 nud failed echo ".2 failed:" # results should not use .2 for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip link del dummy1 ip link add dummy1 up type dummy ip nexthop add id 1 via 192.0.2.1 dev dummy1 onlink ip nexthop add id 2 via 192.0.2.2 dev dummy1 onlink ip nexthop add id 1001 group 1/2 ip rou add 198.51.100.0/24 nhid 1001 for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip neigh add 192.0.2.1 dev dummy1 nud failed echo ".1 failed:" # results should not use .1 for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip neigh del 192.0.2.1 dev dummy1 ip neigh add 192.0.2.2 dev dummy1 nud failed echo ".2 failed:" # results should not use .2 for i in {10..19}; do ip -o rou get 198.51.100.$i; done ip link del dummy1 Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/20230719-nh_select-v2-3-04383e89f868@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/nexthop.c14
1 files changed, 8 insertions, 6 deletions
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index c12acbf39659..93f14d39fef6 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1194,20 +1194,22 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- if (hash > atomic_read(&nhge->hthr.upper_bound))
- continue;
-
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- if (nexthop_is_good_nh(nhge->nh))
- return nhge->nh;
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)