From 9f36169912331fa035d7b73a91252d7c2512eb1a Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 Jun 2024 18:07:52 +0200 Subject: cipso: fix total option length computation As evident from the definition of ip_options_get(), the IP option IPOPT_END is used to pad the IP option data array, not IPOPT_NOP. Yet the loop that walks the IP options to determine the total IP options length in cipso_v4_delopt() doesn't take IPOPT_END into account. Fix it by recognizing the IPOPT_END value as the end of actual options. Fixes: 014ab19a69c3 ("selinux: Set socket NetLabel based on connection endpoint") Signed-off-by: Ondrej Mosnacek Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..5e9ac68444f8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2013,12 +2013,16 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) * from there we can determine the new total option length */ iter = 0; optlen_new = 0; - while (iter < opt->opt.optlen) - if (opt->opt.__data[iter] != IPOPT_NOP) { + while (iter < opt->opt.optlen) { + if (opt->opt.__data[iter] == IPOPT_END) { + break; + } else if (opt->opt.__data[iter] == IPOPT_NOP) { + iter++; + } else { iter += opt->opt.__data[iter + 1]; optlen_new = iter; - } else - iter++; + } + } hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; -- cgit v1.2.3 From 89aa3619d141d6cfb6040a561aebb6d99d3e2285 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 Jun 2024 18:07:53 +0200 Subject: cipso: make cipso_v4_skbuff_delattr() fully remove the CIPSO options As the comment in this function says, the code currently just clears the CIPSO part with IPOPT_NOP, rather than removing it completely and trimming the packet. The other cipso_v4_*_delattr() functions, however, do the proper removal and also calipso_skbuff_delattr() makes an effort to remove the CALIPSO options instead of replacing them with padding. Some routers treat IPv4 packets with anything (even NOPs) in the option header as a special case and take them through a slower processing path. Consequently, hardening guides such as STIG recommend to configure such routers to drop packets with non-empty IP option headers [1][2]. Thus, users might expect NetLabel to produce packets with minimal padding (or at least with no padding when no actual options are present). Implement the proper option removal to address this and to be closer to what the peer functions do. [1] https://www.stigviewer.com/stig/juniper_router_rtr/2019-09-27/finding/V-90937 [2] https://www.stigviewer.com/stig/cisco_ios_xe_router_rtr/2021-03-26/finding/V-217001 Signed-off-by: Ondrej Mosnacek Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 79 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 25 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5e9ac68444f8..e9cb27061c12 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1810,6 +1810,29 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, return CIPSO_V4_HDR_LEN + ret_val; } +static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len) +{ + int iter = 0, optlen = 0; + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length + */ + while (iter < len) { + if (data[iter] == IPOPT_END) { + break; + } else if (data[iter] == IPOPT_NOP) { + iter++; + } else { + iter += data[iter + 1]; + optlen = iter; + } + } + return optlen; +} + /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket @@ -1986,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; - int iter; int optlen_new; cipso_off = opt->opt.cipso - sizeof(struct iphdr); @@ -2006,23 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) memmove(cipso_ptr, cipso_ptr + cipso_len, opt->opt.optlen - cipso_off - cipso_len); - /* determining the new total option length is tricky because of - * the padding necessary, the only thing i can think to do at - * this point is walk the options one-by-one, skipping the - * padding at the end to determine the actual option size and - * from there we can determine the new total option length */ - iter = 0; - optlen_new = 0; - while (iter < opt->opt.optlen) { - if (opt->opt.__data[iter] == IPOPT_END) { - break; - } else if (opt->opt.__data[iter] == IPOPT_NOP) { - iter++; - } else { - iter += opt->opt.__data[iter + 1]; - optlen_new = iter; - } - } + optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data, + opt->opt.optlen); hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; @@ -2242,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, */ int cipso_v4_skbuff_delattr(struct sk_buff *skb) { - int ret_val; + int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len, + hdr_len_delta; struct iphdr *iph; struct ip_options *opt = &IPCB(skb)->opt; unsigned char *cipso_ptr; @@ -2255,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) if (ret_val < 0) return ret_val; - /* the easiest thing to do is just replace the cipso option with noop - * options since we don't change the size of the packet, although we - * still need to recalculate the checksum */ - iph = ip_hdr(skb); cipso_ptr = (unsigned char *)iph + opt->cipso; - memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + cipso_len = cipso_ptr[1]; + + hdr_len_actual = sizeof(struct iphdr) + + cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1), + opt->optlen); + new_hdr_len_actual = hdr_len_actual - cipso_len; + new_hdr_len = (new_hdr_len_actual + 3) & ~3; + hdr_len_delta = (iph->ihl << 2) - new_hdr_len; + + /* 1. shift any options after CIPSO to the left */ + memmove(cipso_ptr, cipso_ptr + cipso_len, + new_hdr_len_actual - opt->cipso); + /* 2. move the whole IP header to its new place */ + memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual); + /* 3. adjust the skb layout */ + skb_pull(skb, hdr_len_delta); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + /* 4. re-fill new padding with IPOPT_END (may now be longer) */ + memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END, + new_hdr_len - new_hdr_len_actual); + + opt->optlen -= hdr_len_delta; opt->cipso = 0; opt->is_changed = 1; - + if (hdr_len_delta != 0) { + iph->ihl = new_hdr_len >> 2; + iph_set_totlen(iph, skb->len); + } ip_send_check(iph); return 0; -- cgit v1.2.3 From 9e046bb111f13461d3f9331e24e974324245140e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Jun 2024 13:06:15 +0000 Subject: tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack() Some applications were reporting ETIMEDOUT errors on apparently good looking flows, according to packet dumps. We were able to root cause the issue to an accidental setting of tp->retrans_stamp in the following scenario: - client sends TFO SYN with data. - server has TFO disabled, ACKs only SYN but not payload. - client receives SYNACK covering only SYN. - tcp_ack() eats SYN and sets tp->retrans_stamp to 0. - tcp_rcv_fastopen_synack() calls tcp_xmit_retransmit_queue() to retransmit TFO payload w/o SYN, sets tp->retrans_stamp to "now", but we are not in any loss recovery state. - TFO payload is ACKed. - we are not in any loss recovery state, and don't see any dupacks, so we don't get to any code path that clears tp->retrans_stamp. - tp->retrans_stamp stays non-zero for the lifetime of the connection. - after first RTO, tcp_clamp_rto_to_user_timeout() clamps second RTO to 1 jiffy due to bogus tp->retrans_stamp. - on clamped RTO with non-zero icsk_retransmits, retransmits_timed_out() sets start_ts from tp->retrans_stamp from TFO payload retransmit hours/days ago, and computes bogus long elapsed time for loss recovery, and suffers ETIMEDOUT early. Fixes: a7abf3cd76e1 ("tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()") CC: stable@vger.kernel.org Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Co-developed-by: Yuchung Cheng Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240614130615.396837-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..01d208e0eef3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6296,6 +6296,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, skb_rbtree_walk_from(data) tcp_mark_skb_lost(sk, data); tcp_xmit_retransmit_queue(sk); + tp->retrans_stamp = 0; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; -- cgit v1.2.3 From f9ae848904289ddb16c7c9e4553ed4c64300de49 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 19 Jun 2024 01:29:04 +0100 Subject: net/tcp_ao: Don't leak ao_info on error-path It seems I introduced it together with TCP_AO_CMDF_AO_REQUIRED, on version 5 [1] of TCP-AO patches. Quite frustrative that having all these selftests that I've written, running kmemtest & kcov was always in todo. [1]: https://lore.kernel.org/netdev/20230215183335.800122-5-dima@arista.com/ Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240617072451.1403e1d2@kernel.org/ Fixes: 0aadc73995d0 ("net/tcp: Prevent TCP-MD5 with TCP-AO being set") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240619-tcp-ao-required-leak-v1-1-6408f3c94247@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ao.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..09c0fa6756b7 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1968,8 +1968,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, first = true; } - if (cmd.ao_required && tcp_ao_required_verify(sk)) - return -EKEYREJECTED; + if (cmd.ao_required && tcp_ao_required_verify(sk)) { + err = -EKEYREJECTED; + goto out; + } /* For sockets in TCP_CLOSED it's possible set keys that aren't * matching the future peer (address/port/VRF/etc), -- cgit v1.2.3