tcp: Set pingpong threshold via sysctl
TCP pingpong threshold is 1 by default. But some applications, like SQL DB may prefer a higher pingpong threshold to activate delayed acks in quick ack mode for better performance. The pingpong threshold and related code were changed to 3 in the year 2019 in: commit4a41f453be("tcp: change pingpong threshold to 3") And reverted to 1 in the year 2022 in: commit4d8f24eeed("Revert "tcp: change pingpong threshold to 3"") There is no single value that fits all applications. Add net.ipv4.tcp_pingpong_thresh sysctl tunable, so it can be tuned for optimal performance based on the application needs. Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Reviewed-by: Simon Horman <horms@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/1697056244-21888-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
committed by
Jakub Kicinski
parent
39d08b9164
commit
562b1fdf06
@@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER
|
||||
|
||||
Default: 128
|
||||
|
||||
tcp_pingpong_thresh - INTEGER
|
||||
The number of estimated data replies sent for estimated incoming data
|
||||
requests that must happen before TCP considers that a connection is a
|
||||
"ping-pong" (request-response) connection for which delayed
|
||||
acknowledgments can provide benefits.
|
||||
|
||||
This threshold is 1 by default, but some applications may need a higher
|
||||
threshold for optimal performance.
|
||||
|
||||
Possible Values: 1 - 255
|
||||
|
||||
Default: 1
|
||||
|
||||
UDP variables
|
||||
=============
|
||||
|
||||
|
||||
@@ -328,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
|
||||
|
||||
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
|
||||
|
||||
#define TCP_PINGPONG_THRESH 1
|
||||
|
||||
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
|
||||
{
|
||||
inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
|
||||
inet_csk(sk)->icsk_ack.pingpong =
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
|
||||
}
|
||||
|
||||
static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
|
||||
@@ -342,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
|
||||
|
||||
static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
|
||||
{
|
||||
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
|
||||
return inet_csk(sk)->icsk_ack.pingpong >=
|
||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
|
||||
}
|
||||
|
||||
static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
if (icsk->icsk_ack.pingpong < U8_MAX)
|
||||
icsk->icsk_ack.pingpong++;
|
||||
}
|
||||
|
||||
static inline bool inet_csk_has_ulp(const struct sock *sk)
|
||||
|
||||
@@ -133,6 +133,8 @@ struct netns_ipv4 {
|
||||
u8 sysctl_tcp_migrate_req;
|
||||
u8 sysctl_tcp_comp_sack_nr;
|
||||
u8 sysctl_tcp_backlog_ack_defer;
|
||||
u8 sysctl_tcp_pingpong_thresh;
|
||||
|
||||
int sysctl_tcp_reordering;
|
||||
u8 sysctl_tcp_retries1;
|
||||
u8 sysctl_tcp_retries2;
|
||||
|
||||
@@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_pingpong_thresh",
|
||||
.data = &init_net.ipv4.sysctl_tcp_pingpong_thresh,
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ONE,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
||||
@@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
|
||||
net->ipv4.sysctl_tcp_shrink_window = 0;
|
||||
|
||||
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
|
||||
tp->lsndtime = now;
|
||||
|
||||
/* If it is a reply for ato after last received
|
||||
* packet, enter pingpong mode.
|
||||
* packet, increase pingpong count.
|
||||
*/
|
||||
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
|
||||
inet_csk_enter_pingpong_mode(sk);
|
||||
inet_csk_inc_pingpong_cnt(sk);
|
||||
}
|
||||
|
||||
/* Account for an ACK we sent. */
|
||||
|
||||
Reference in New Issue
Block a user