tcp: defer SACK compression after DupThresh
authorEric Dumazet <edumazet@google.com>
Tue, 20 Nov 2018 13:53:59 +0000 (05:53 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 21 Nov 2018 23:49:52 +0000 (15:49 -0800)
Jean-Louis reported a TCP regression and bisected to recent SACK
compression.

After a loss episode (receiver not able to keep up and dropping
packets because its backlog is full), linux TCP stack is sending
a single SACK (DUPACK).

Sender waits a full RTO timer before recovering losses.

While RFC 6675 says in section 5, "Algorithm Details",

   (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true --
       indicating at least three segments have arrived above the current
       cumulative acknowledgment point, which is taken to indicate loss
       -- go to step (4).
...
   (4) Invoke fast retransmit and enter loss recovery as follows:

there are old TCP stacks not implementing this strategy, and
still counting the dupacks before starting fast retransmit.

While these stacks probably perform poorly when receivers implement
LRO/GRO, we should be a little more gentle to them.

This patch makes sure we do not enable SACK compression unless
3 dupacks have been sent since last rcv_nxt update.

Ideally we should even rearm the timer to send one or two
more DUPACK if no more packets are coming, but that will
be work aiming for linux-4.21.

Many thanks to Jean-Louis for bisecting the issue, providing
packet captures and testing this patch.

Fixes: 5d9f4262b7ea ("tcp: add SACK compression")
Reported-by: Jean-Louis Dupond <jean-louis@dupond.be>
Tested-by: Jean-Louis Dupond <jean-louis@dupond.be>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/tcp.h
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c

index 8ed77bb..a9b0280 100644 (file)
@@ -196,6 +196,7 @@ struct tcp_sock {
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
        u32     lsndtime;       /* timestamp of last sent data packet (for restart window) */
        u32     last_oow_ack_time;  /* timestamp of last out-of-window ACK */
+       u32     compressed_ack_rcv_nxt;
 
        u32     tsoffset;       /* timestamp offset */
 
index e695584..1e37c13 100644 (file)
@@ -4268,7 +4268,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
         * If the sack array is full, forget about the last one.
         */
        if (this_sack >= TCP_NUM_SACKS) {
-               if (tp->compressed_ack)
+               if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
                        tcp_send_ack(sk);
                this_sack--;
                tp->rx_opt.num_sacks--;
@@ -5189,7 +5189,17 @@ send_now:
        if (!tcp_is_sack(tp) ||
            tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
                goto send_now;
-       tp->compressed_ack++;
+
+       if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+               tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+               if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+                       NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+                                     tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+               tp->compressed_ack = 0;
+       }
+
+       if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+               goto send_now;
 
        if (hrtimer_is_queued(&tp->compressed_ack_timer))
                return;
index 9c34b97..3f510ca 100644 (file)
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (unlikely(tp->compressed_ack)) {
+       if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
                NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
-                             tp->compressed_ack);
-               tp->compressed_ack = 0;
+                             tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+               tp->compressed_ack = TCP_FASTRETRANS_THRESH;
                if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
                        __sock_put(sk);
        }
index 6760206..5f8b6d3 100644 (file)
@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
 
        bh_lock_sock(sk);
        if (!sock_owned_by_user(sk)) {
-               if (tp->compressed_ack)
+               if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
                        tcp_send_ack(sk);
        } else {
                if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,