From a158bdd3247b9656df36ba133235fff702e9fdc3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:41 +0000 Subject: rxrpc: Fix call timeouts Fix the rxrpc call expiration timeouts and make them settable from userspace. By analogy with other rx implementations, there should be three timeouts: (1) "Normal timeout" This is set for all calls and is triggered if we haven't received any packets from the peer in a while. It is measured from the last time we received any packet on that call. This is not reset by any connection packets (such as CHALLENGE/RESPONSE packets). If a service operation takes a long time, the server should generate PING ACKs at a duration that's substantially less than the normal timeout so is to keep both sides alive. This is set at 1/6 of normal timeout. (2) "Idle timeout" This is set only for a service call and is triggered if we stop receiving the DATA packets that comprise the request data. It is measured from the last time we received a DATA packet. (3) "Hard timeout" This can be set for a call and specified the maximum lifetime of that call. It should not be specified by default. Some operations (such as volume transfer) take a long time. Allow userspace to set/change the timeouts on a call with sendmsg, using a control message: RXRPC_SET_CALL_TIMEOUTS The data to the message is a number of 32-bit words, not all of which need be given: u32 hard_timeout; /* sec from first packet */ u32 idle_timeout; /* msec from packet Rx */ u32 normal_timeout; /* msec from data Rx */ This can be set in combination with any other sendmsg() that affects a call. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 69 ++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 22 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ebe96796027a..01dcbc2164b5 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -138,10 +138,20 @@ enum rxrpc_rtt_rx_trace { enum rxrpc_timer_trace { rxrpc_timer_begin, + rxrpc_timer_exp_ack, + rxrpc_timer_exp_hard, + rxrpc_timer_exp_idle, + rxrpc_timer_exp_normal, + rxrpc_timer_exp_ping, + rxrpc_timer_exp_resend, rxrpc_timer_expired, rxrpc_timer_init_for_reply, rxrpc_timer_init_for_send_reply, + rxrpc_timer_restart, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_hard, + rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, @@ -296,12 +306,22 @@ enum rxrpc_congest_change { #define rxrpc_timer_traces \ EM(rxrpc_timer_begin, "Begin ") \ EM(rxrpc_timer_expired, "*EXPR*") \ + EM(rxrpc_timer_exp_ack, "ExpAck") \ + EM(rxrpc_timer_exp_hard, "ExpHrd") \ + EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_normal, "ExpNml") \ + EM(rxrpc_timer_exp_ping, "ExpPng") \ + EM(rxrpc_timer_exp_resend, "ExpRsn") \ EM(rxrpc_timer_init_for_reply, "IniRpl") \ EM(rxrpc_timer_init_for_send_reply, "SndRpl") \ + EM(rxrpc_timer_restart, "Restrt") \ EM(rxrpc_timer_set_for_ack, "SetAck") \ + EM(rxrpc_timer_set_for_hard, "SetHrd") \ + EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ - E_(rxrpc_timer_set_for_send, "SetTx ") + E_(rxrpc_timer_set_for_send, "SetSnd") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -932,39 +952,44 @@ TRACE_EVENT(rxrpc_rtt_rx, TRACE_EVENT(rxrpc_timer, TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now, unsigned long now_j), + unsigned long now), - TP_ARGS(call, why, now, now_j), + TP_ARGS(call, why, now), TP_STRUCT__entry( __field(struct rxrpc_call *, call ) __field(enum rxrpc_timer_trace, why ) - __field_struct(ktime_t, now ) - __field_struct(ktime_t, expire_at ) - __field_struct(ktime_t, ack_at ) - __field_struct(ktime_t, resend_at ) - __field(unsigned long, now_j ) - __field(unsigned long, timer ) + __field(long, now ) + __field(long, ack_at ) + __field(long, resend_at ) + __field(long, ping_at ) + __field(long, expect_rx_by ) + __field(long, expect_req_by ) + __field(long, expect_term_by ) + __field(long, timer ) ), TP_fast_assign( - __entry->call = call; - __entry->why = why; - __entry->now = now; - __entry->expire_at = call->expire_at; - __entry->ack_at = call->ack_at; - __entry->resend_at = call->resend_at; - __entry->now_j = now_j; - __entry->timer = call->timer.expires; + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->expect_rx_by = call->expect_rx_by; + __entry->expect_req_by = call->expect_req_by; + __entry->expect_term_by = call->expect_term_by; + __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", + TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), - ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), - ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), - __entry->timer - __entry->now_j) + __entry->ack_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->expect_rx_by - __entry->now, + __entry->expect_req_by - __entry->now, + __entry->expect_term_by - __entry->now, + __entry->timer - __entry->now) ); TRACE_EVENT(rxrpc_rx_lose, -- cgit v1.2.3 From bd1fdf8cfdf3fdbccd2b21c33ec649ebd7429af7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Add a timeout for detecting lost ACKs/lost DATA Add an extra timeout that is set/updated when we send a DATA packet that has the request-ack flag set. This allows us to detect if we don't get an ACK in response to the latest flagged packet. The ACK packet is adjudged to have been lost if it doesn't turn up within 2*RTT of the transmission. If the timeout occurs, we schedule the sending of a PING ACK to find out the state of the other side. If a new DATA packet is ready to go sooner, we cancel the sending of the ping and set the request-ack flag on that instead. If we get back a PING-RESPONSE ACK that indicates a lower tx_top than what we had at the time of the ping transmission, we adjudge all the DATA packets sent between the response tx_top and the ping-time tx_top to have been lost and retransmit immediately. Rather than sending a PING ACK, we could just pick a DATA packet and speculatively retransmit that with request-ack set. It should result in either a REQUESTED ACK or a DUPLICATE ACK which we can then use in lieu the a PING-RESPONSE ACK mentioned above. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 11 +++++++++-- net/rxrpc/ar-internal.h | 6 +++++- net/rxrpc/call_event.c | 26 ++++++++++++++++++++++---- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/rxrpc/output.c | 20 ++++++++++++++++++-- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 2 +- 8 files changed, 98 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 01dcbc2164b5..84ade8b76a19 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, rxrpc_timer_exp_resend, @@ -151,6 +152,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, @@ -309,6 +311,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ EM(rxrpc_timer_exp_resend, "ExpRsn") \ @@ -318,6 +321,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ EM(rxrpc_timer_set_for_resend, "SetRTx") \ @@ -961,6 +965,7 @@ TRACE_EVENT(rxrpc_timer, __field(enum rxrpc_timer_trace, why ) __field(long, now ) __field(long, ack_at ) + __field(long, ack_lost_at ) __field(long, resend_at ) __field(long, ping_at ) __field(long, expect_rx_by ) @@ -974,6 +979,7 @@ TRACE_EVENT(rxrpc_timer, __entry->why = why; __entry->now = now; __entry->ack_at = call->ack_at; + __entry->ack_lost_at = call->ack_lost_at; __entry->resend_at = call->resend_at; __entry->expect_rx_by = call->expect_rx_by; __entry->expect_req_by = call->expect_req_by; @@ -981,10 +987,11 @@ TRACE_EVENT(rxrpc_timer, __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s a=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%p %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), __entry->ack_at - __entry->now, + __entry->ack_lost_at - __entry->now, __entry->resend_at - __entry->now, __entry->expect_rx_by - __entry->now, __entry->expect_req_by - __entry->now, @@ -1105,7 +1112,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%p r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 548411371048..7e7b817c69f0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -471,6 +471,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ + RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ }; /* @@ -515,6 +516,7 @@ struct rxrpc_call { struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ @@ -624,6 +626,8 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ + rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ + rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ }; /* @@ -1011,7 +1015,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *, bool); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index da91f16ac77c..c65666b2f39e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -245,7 +245,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto out; } @@ -310,6 +310,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); + rxrpc_serial_t *send_ack; unsigned long now, next, t; rxrpc_see_call(call); @@ -358,6 +359,13 @@ recheck_state: set_bit(RXRPC_CALL_EV_ACK, &call->events); } + t = READ_ONCE(call->ack_lost_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); + cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -379,15 +387,24 @@ recheck_state: goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + send_ack = NULL; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { + call->acks_lost_top = call->tx_top; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + send_ack = &call->acks_lost_ping; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + send_ack) { if (call->ackr_reason) { - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, send_ack); goto recheck_state; } } if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto recheck_state; } @@ -404,6 +421,7 @@ recheck_state: set(call->expect_req_by); set(call->expect_term_by); set(call->ack_at); + set(call->ack_lost_at); set(call->resend_at); set(call->ping_at); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index b305970a9b63..7ee3d6ce5aa2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -197,6 +197,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) unsigned long j = now + MAX_JIFFY_OFFSET; call->ack_at = j; + call->ack_lost_at = j; call->resend_at = j; call->ping_at = j; call->expect_rx_by = j; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c89647eae86d..23a5e61d8f79 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -630,6 +630,43 @@ found: orig_serial, ack_serial, sent_at, resp_time); } +/* + * Process the response to a ping that we sent to find out if we lost an ACK. + * + * If we got back a ping response that indicates a lower tx_top than what we + * had at the time of the ping transmission, we adjudge all the DATA packets + * sent between the response tx_top and the ping-time tx_top to have been lost. + */ +static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) +{ + rxrpc_seq_t top, bottom, seq; + bool resend = false; + + spin_lock_bh(&call->lock); + + bottom = call->tx_hard_ack + 1; + top = call->acks_lost_top; + if (before(bottom, top)) { + for (seq = bottom; before_eq(seq, top); seq++) { + int ix = seq & RXRPC_RXTX_BUFF_MASK; + u8 annotation = call->rxtx_annotations[ix]; + u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; + + if (anno_type != RXRPC_TX_ANNO_UNACK) + continue; + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = annotation; + resend = true; + } + } + + spin_unlock_bh(&call->lock); + + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + /* * Process a ping response. */ @@ -645,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, smp_rmb(); ping_serial = call->ping_serial; + if (orig_serial == call->acks_lost_ping) + rxrpc_input_check_for_lost_ack(call); + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) return; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f47659c7b224..efe06edce189 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -95,7 +95,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -165,6 +166,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); + if (_serial) + *_serial = serial; if (ping) { call->ping_serial = serial; @@ -323,7 +326,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && - (retrans || + (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || + retrans || call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), @@ -370,6 +374,18 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + if (call->peer->rtt_usage > 1) { + unsigned long nowj = jiffies, ack_lost_at; + + ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); + if (ack_lost_at < 1) + ack_lost_at = 1; + + ack_lost_at += nowj; + WRITE_ONCE(call->ack_lost_at, ack_lost_at); + rxrpc_reduce_call_timer(call, ack_lost_at, nowj, + rxrpc_timer_set_for_lost_ack); + } } } _leave(" = %d [%u]", ret, call->peer->maxdata); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index fad5f42a3abd..cc21e8db25b0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -148,7 +148,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } #endif @@ -222,7 +222,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, true, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index c56ee54fdd1f..a1c53ac066a1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -285,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); if (!skb) { size_t size, chunk, max, space; -- cgit v1.2.3 From 415f44e43282a16ec0808c7ccfd401762e587437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Add keepalive for a call We need to transmit a packet every so often to act as a keepalive for the peer (which has a timeout from the last time it received a packet) and also to prevent any intervening firewalls from closing the route. Do this by resetting a timer every time we transmit a packet. If the timer ever expires, we transmit a PING ACK packet and thereby also elicit a PING RESPONSE ACK from the other side - which prevents our last-rx timeout from expiring. The timer is set to 1/6 of the last-rx timeout so that we can detect the other side going away if it misses 6 replies in a row. This is particularly necessary for servers where the processing of the service function may take a significant amount of time. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 ++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 10 ++++++++++ net/rxrpc/output.c | 23 +++++++++++++++++++++++ 4 files changed, 40 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 84ade8b76a19..e98fed6de497 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -141,6 +141,7 @@ enum rxrpc_timer_trace { rxrpc_timer_exp_ack, rxrpc_timer_exp_hard, rxrpc_timer_exp_idle, + rxrpc_timer_exp_keepalive, rxrpc_timer_exp_lost_ack, rxrpc_timer_exp_normal, rxrpc_timer_exp_ping, @@ -152,6 +153,7 @@ enum rxrpc_timer_trace { rxrpc_timer_set_for_ack, rxrpc_timer_set_for_hard, rxrpc_timer_set_for_idle, + rxrpc_timer_set_for_keepalive, rxrpc_timer_set_for_lost_ack, rxrpc_timer_set_for_normal, rxrpc_timer_set_for_ping, @@ -162,6 +164,7 @@ enum rxrpc_timer_trace { enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_keepalive, rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, @@ -311,6 +314,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ + EM(rxrpc_timer_exp_keepalive, "ExpKA ") \ EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ EM(rxrpc_timer_exp_normal, "ExpNml") \ EM(rxrpc_timer_exp_ping, "ExpPng") \ @@ -321,6 +325,7 @@ enum rxrpc_congest_change { EM(rxrpc_timer_set_for_ack, "SetAck") \ EM(rxrpc_timer_set_for_hard, "SetHrd") \ EM(rxrpc_timer_set_for_idle, "SetIdl") \ + EM(rxrpc_timer_set_for_keepalive, "KeepAl") \ EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ EM(rxrpc_timer_set_for_normal, "SetNml") \ EM(rxrpc_timer_set_for_ping, "SetPng") \ @@ -330,6 +335,7 @@ enum rxrpc_congest_change { #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ + EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7e7b817c69f0..cdcbc798f921 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -519,6 +519,7 @@ struct rxrpc_call { unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ unsigned long ping_at; /* When next to send a ping */ + unsigned long keepalive_at; /* When next to send a keepalive ping */ unsigned long expect_rx_by; /* When we expect to get a packet by */ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ unsigned long expect_term_by; /* When we expect call termination by */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c65666b2f39e..bda952ffe6a6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -366,6 +366,15 @@ recheck_state: set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); } + t = READ_ONCE(call->keepalive_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); + cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ack_ping_for_keepalive); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + t = READ_ONCE(call->ping_at); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); @@ -423,6 +432,7 @@ recheck_state: set(call->ack_at); set(call->ack_lost_at); set(call->resend_at); + set(call->keepalive_at); set(call->ping_at); now = jiffies; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index efe06edce189..42410e910aff 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -32,6 +32,24 @@ struct rxrpc_abort_buffer { __be32 abort_code; }; +/* + * Arrange for a keepalive ping a certain time after we last transmitted. This + * lets the far side know we're still interested in this call and helps keep + * the route through any intervening firewall open. + * + * Receiving a response to the ping will prevent the ->expect_rx_by timer from + * expiring. + */ +static void rxrpc_set_keepalive(struct rxrpc_call *call) +{ + unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + + keepalive_at += now; + WRITE_ONCE(call->keepalive_at, keepalive_at); + rxrpc_reduce_call_timer(call, keepalive_at, now, + rxrpc_timer_set_for_keepalive); +} + /* * Fill out an ACK packet. */ @@ -205,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, call->ackr_seen = top; spin_unlock_bh(&call->lock); } + + rxrpc_set_keepalive(call); } out: @@ -388,6 +408,9 @@ done: } } } + + rxrpc_set_keepalive(call); + _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; -- cgit v1.2.3 From f859ab61875978eeaa539740ff7f7d91f5d60006 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: rxrpc: Fix service endpoint expiry RxRPC service endpoints expire like they're supposed to by the following means: (1) Mark dead rxrpc_net structs (with ->live) rather than twiddling the global service conn timeout, otherwise the first rxrpc_net struct to die will cause connections on all others to expire immediately from then on. (2) Mark local service endpoints for which the socket has been closed (->service_closed) so that the expiration timeout can be much shortened for service and client connections going through that endpoint. (3) rxrpc_put_service_conn() needs to schedule the reaper when the usage count reaches 1, not 0, as idle conns have a 1 count. (4) The accumulator for the earliest time we might want to schedule for should be initialised to jiffies + MAX_JIFFY_OFFSET, not ULONG_MAX as the comparison functions use signed arithmetic. (5) Simplify the expiration handling, adding the expiration value to the idle timestamp each time rather than keeping track of the time in the past before which the idle timestamp must go to be expired. This is much easier to read. (6) Ignore the timeouts if the net namespace is dead. (7) Restart the service reaper work item rather the client reaper. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 13 +++++++++++++ net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/conn_client.c | 2 ++ net/rxrpc/conn_object.c | 42 ++++++++++++++++++++++++------------------ net/rxrpc/net_ns.c | 3 +++ 6 files changed, 47 insertions(+), 18 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e98fed6de497..36cb50c111a6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -49,6 +49,7 @@ enum rxrpc_conn_trace { rxrpc_conn_put_client, rxrpc_conn_put_service, rxrpc_conn_queued, + rxrpc_conn_reap_service, rxrpc_conn_seen, }; @@ -221,6 +222,7 @@ enum rxrpc_congest_change { EM(rxrpc_conn_put_client, "PTc") \ EM(rxrpc_conn_put_service, "PTs") \ EM(rxrpc_conn_queued, "QUE") \ + EM(rxrpc_conn_reap_service, "RPs") \ E_(rxrpc_conn_seen, "SEE") #define rxrpc_client_traces \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c0cdcf980ffc..abb524c2b8f8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -867,6 +867,19 @@ static int rxrpc_release_sock(struct sock *sk) sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cdcbc798f921..a0082c407005 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -84,6 +84,7 @@ struct rxrpc_net { unsigned int nr_client_conns; unsigned int nr_active_client_conns; bool kill_all_client_conns; + bool live; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head waiting_client_conns; @@ -265,6 +266,7 @@ struct rxrpc_local { rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; + bool service_closed; /* Service socket closed */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -881,6 +883,7 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 97f6a8de4845..785dfdb9fef1 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1079,6 +1079,8 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index a01a3791f06a..726eda6140ae 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -20,7 +20,8 @@ /* * Time till a connection expires after last use (in seconds). */ -unsigned int rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_destroy_connection(struct rcu_head *); @@ -321,7 +322,7 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) { + if (n == 1) { rxnet = conn->params.local->rxnet; rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); } @@ -363,15 +364,14 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(to_delayed_work(work), struct rxrpc_net, service_conn_reaper); - unsigned long reap_older_than, earliest, idle_timestamp, now; + unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); now = jiffies; - reap_older_than = now - rxrpc_connection_expiry * HZ; - earliest = ULONG_MAX; + earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { @@ -381,15 +381,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - idle_timestamp = READ_ONCE(conn->idle_timestamp); - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), - (long)reap_older_than - (long)idle_timestamp); - - if (time_after(idle_timestamp, reap_older_than)) { - if (time_before(idle_timestamp, earliest)) - earliest = idle_timestamp; - continue; + if (rxnet->live) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } } /* The usage count sits at 1 whilst the object is unused on the @@ -397,6 +403,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; + trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); if (rxrpc_conn_is_client(conn)) BUG(); @@ -407,10 +414,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } write_unlock(&rxnet->conn_lock); - if (earliest != ULONG_MAX) { - _debug("reschedule reaper %ld", (long) earliest - now); + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, earliest - now); } @@ -439,7 +446,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - rxrpc_connection_expiry = 0; cancel_delayed_work(&rxnet->client_conn_reaper); rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5..684c51d600c7 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -22,6 +22,7 @@ static __net_init int rxrpc_init_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); int ret; + rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; @@ -60,6 +61,7 @@ static __net_init int rxrpc_init_net(struct net *net) return 0; err_proc: + rxnet->live = false; return ret; } @@ -70,6 +72,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); + rxnet->live = false; rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_locals(rxnet); -- cgit v1.2.3