From 7355bfe0e0cc27597d530f78e259a985cb85af40 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 23 Jan 2022 13:57:17 +0100 Subject: netfilter: Remove flowtable relics NF_FLOW_TABLE_IPV4 and NF_FLOW_TABLE_IPV6 are invisble, selected by nothing (so they can no longer be enabled), and their last real users have been removed (nf_flow_table_ipv6.c is empty). Clean up the leftovers. Fixes: c42ba4290b2147aa ("netfilter: flowtable: remove ipv4/ipv6 modules") Signed-off-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 4 ---- net/ipv6/netfilter/Kconfig | 4 ---- net/ipv6/netfilter/Makefile | 3 --- net/ipv6/netfilter/nf_flow_table_ipv6.c | 0 4 files changed, 11 deletions(-) delete mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 67087f95579f..aab384126f61 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,10 +58,6 @@ config NF_TABLES_ARP endif # NF_TABLES -config NF_FLOW_TABLE_IPV4 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 97d3d1b36dbc..0ba62f4868f9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,10 +47,6 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES -config NF_FLOW_TABLE_IPV6 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b85383606df7..b8d6dc9aeeb6 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o -# flow table support -obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o - # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.3 From 34243b9ec856309339172b1507379074156947e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 23 Jan 2022 15:24:00 +0100 Subject: netfilter: nft_ct: fix use after free when attaching zone template The conversion erroneously removed the refcount increment. In case we can use the percpu template, we need to increment the refcount, else it will be released when the skb gets freed. In case the slowpath is taken, the new template already has a refcount of 1. Fixes: 719774377622 ("netfilter: conntrack: convert to refcount_t api") Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 518d96c8c247..5adf8bb628a8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); if (likely(refcount_read(&ct->ct_general.use) == 1)) { + refcount_inc(&ct->ct_general.use); nf_ct_zone_add(ct, &zone); } else { - /* previous skb got queued to userspace */ + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); if (!ct) { regs->verdict.code = NF_DROP; -- cgit v1.2.3 From c858620d2ae3489409af593f005a48a8a324da3d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 23 Jan 2022 15:45:54 +0100 Subject: selftests: netfilter: reduce zone stress test running time This selftests needs almost 3 minutes to complete, reduce the insertes zones to 1000. Test now completes in about 20 seconds. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_zones_many.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index 04633119b29a..5a8db0b48928 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -9,7 +9,7 @@ ns="ns-$sfx" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -zones=20000 +zones=2000 have_ct_tool=0 ret=0 @@ -75,10 +75,10 @@ EOF while [ $i -lt $max_zones ]; do local start=$(date +%s%3N) - i=$((i + 10000)) + i=$((i + 1000)) j=$((j + 1)) # nft rule in output places each packet in a different zone. - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break @@ -86,7 +86,7 @@ EOF stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" done if [ $have_ct_tool -eq 1 ]; then @@ -128,11 +128,11 @@ test_conntrack_tool() { break fi - if [ $((i%10000)) -eq 0 ];then + if [ $((i%1000)) -eq 0 ];then stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total)" + echo "PASS: added 1000 entries in $duration ms (now $i total)" start=$stop fi done -- cgit v1.2.3 From aad51ca71ad83273e8826d6cfdcf53c98748d1fa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jan 2022 22:09:15 +0100 Subject: selftests: netfilter: check stateless nat udp checksum fixup Add a test that sends large udp packet (which is fragmented) via a stateless nft nat rule, i.e. 'ip saddr set 10.2.3.4' and check that the datagram is received by peer. On kernels without commit 4e1860a38637 ("netfilter: nft_payload: do not update layer 4 checksum when mangling fragments")', this will fail with: cmp: EOF on /tmp/tmp.V1q0iXJyQF which is empty -rw------- 1 root root 4096 Jan 24 22:03 /tmp/tmp.Aaqnq4rBKS -rw------- 1 root root 0 Jan 24 22:03 /tmp/tmp.V1q0iXJyQF ERROR: in and output file mismatch when checking udp with stateless nat FAIL: nftables v1.0.0 (Fearless Fosdick #2) On patched kernels, this will show: PASS: IP statless for ns2-PFp89amx Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_nat.sh | 152 +++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 349a319a9e51..79fe627b9e81 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -899,6 +899,144 @@ EOF ip netns exec "$ns0" nft delete table $family nat } +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + local tmpfile=$(mktemp) + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + + local outfile=$(mktemp) + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & + sc_r=$! + + sleep 1 + # re-do with large ping -> ip fragmentation + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null + if [ $? -ne 0 ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + cmp "$tmpfile" "$outfile" + if [ $? -ne 0 ]; then + ls -l "$tmpfile" "$outfile" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + rm -f "$tmpfile" "$outfile" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + ip netns exec "$ns0" nft delete table ip stateless + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do ip netns exec ns$i-$sfx nft -f /dev/stdin <dev before passing nskb on to br_forward(). The shared skbuff creation helpers introduced in above commit do which seems to confuse br_forward() as reject statements in prerouting hook won't emit a packet anymore. Fix this by simply passing NULL instead of 'dev' to the helpers - they use the pointer for just that assignment, nothing else. Fixes: fa538f7cf05aa ("netfilter: nf_reject: add reject skbuff creation helpers") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index eba0efe64d05..fbf858ddec35 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; @@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; -- cgit v1.2.3 From f459bfd4b9793f25e0fcf19878edd87d8dc569d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Jan 2022 01:46:58 +0100 Subject: netfilter: nft_byteorder: track register operations Cancel tracking for byteorder operation, otherwise selector + byteorder operation is incorrectly reduced if source and destination registers are the same. Reported-by: kernel test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_byteorder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 9d5947ab8d4e..e646e9ee4a98 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -167,12 +167,24 @@ nla_put_failure: return -1; } +static bool nft_byteorder_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + + return false; +} + static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, + .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { -- cgit v1.2.3 From eda0cf1202acf1ef47f93d8f92d4839213431424 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jan 2022 12:54:54 +0100 Subject: selftests: nft_concat_range: add test for reload with no element add/del Add a specific test for the reload issue fixed with commit 23c54263efd7cb ("netfilter: nft_set_pipapo: allocate pcpu scratch maps on clone"). Add to set, then flush set content + restore without other add/remove in the transaction. On kernels before the fix, this test case fails: net,mac with reload [FAIL] Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_concat_range.sh | 72 +++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index ed61f6cab60f..df322e47a54f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -354,6 +354,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } -- cgit v1.2.3 From b07f413732549e5a96e891411fbb5980f2d8e5a1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Jan 2022 13:40:38 +0100 Subject: netfilter: nf_tables: remove assignment with no effect in chain blob builder cppcheck possible warnings: >> net/netfilter/nf_tables_api.c:2014:2: warning: Assignment of function parameter has no effect outside the function. Did you forget dereferencing it? [uselessAssignmentPtrArg] ptr += offsetof(struct nft_rule_dp, data); ^ Reported-by: kernel test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cf454f8ca2b0..5fa16990da95 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) prule = (struct nft_rule_dp *)ptr; prule->is_last = 1; - ptr += offsetof(struct nft_rule_dp, data); /* blob size does not include the trailer rule */ } -- cgit v1.2.3