net/ipv4/tcp_timer.c

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     $Id: tcp_timer.c,v 1.87 2001/09/21 21:27:34 davem Exp $
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  20  *              Jorge Cwik, <jorge@laser.satlink.net>
  21  */
  22
  23 #include <net/tcp.h>
  24
  25 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
  26 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
  27 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
  28 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
  29 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
  30 int sysctl_tcp_retries1 = TCP_RETR1;
  31 int sysctl_tcp_retries2 = TCP_RETR2;
  32 int sysctl_tcp_orphan_retries;
  33
  34 static void tcp_write_timer(unsigned long);
  35 static void tcp_delack_timer(unsigned long);
  36 static void tcp_keepalive_timer (unsigned long data);
  37
  38 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
  39
  40 /*
  41  * Using different timers for retransmit, delayed acks and probes
  42  * We may wish use just one timer maintaining a list of expire jiffies
  43  * to optimize.
  44  */
  45
  46 void tcp_init_xmit_timers(struct sock *sk)
  47 {
  48         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
  49
  50         init_timer(&tp->retransmit_timer);
  51         tp->retransmit_timer.function=&tcp_write_timer;
  52         tp->retransmit_timer.data = (unsigned long) sk;
  53         tp->pending = 0;
  54
  55         init_timer(&tp->delack_timer);
  56         tp->delack_timer.function=&tcp_delack_timer;
  57         tp->delack_timer.data = (unsigned long) sk;
  58         tp->ack.pending = 0;
  59
  60         init_timer(&sk->timer);
  61         sk->timer.function=&tcp_keepalive_timer;
  62         sk->timer.data = (unsigned long) sk;
  63 }
  64
  65 void tcp_clear_xmit_timers(struct sock *sk)
  66 {
  67         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
  68
  69         tp->pending = 0;
  70         if (timer_pending(&tp->retransmit_timer) &&
  71             del_timer(&tp->retransmit_timer))
  72                 __sock_put(sk);
  73
  74         tp->ack.pending = 0;
  75         tp->ack.blocked = 0;
  76         if (timer_pending(&tp->delack_timer) &&
  77             del_timer(&tp->delack_timer))
  78                 __sock_put(sk);
  79
  80         if(timer_pending(&sk->timer) && del_timer(&sk->timer))
  81                 __sock_put(sk);
  82 }
  83
  84 static void tcp_write_err(struct sock *sk)
  85 {
  86         sk->err = sk->err_soft ? : ETIMEDOUT;
  87         sk->error_report(sk);
  88
  89         tcp_done(sk);
  90         NET_INC_STATS_BH(TCPAbortOnTimeout);
  91 }
  92
  93 /* Do not allow orphaned sockets to eat all our resources.
  94  * This is direct violation of TCP specs, but it is required
  95  * to prevent DoS attacks. It is called when a retransmission timeout
  96  * or zero probe timeout occurs on orphaned socket.
  97  *
  98  * Criterium is still not confirmed experimentally and may change.
  99  * We kill the socket, if:
 100  * 1. If number of orphaned sockets exceeds an administratively configured
 101  *    limit.
 102  * 2. If we have strong memory pressure.
 103  */
 104 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 105 {
 106         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 107         int orphans = atomic_read(&tcp_orphan_count);
 108
 109         /* If peer does not open window for long time, or did not transmit
 110          * anything for long time, penalize it. */
 111         if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 112                 orphans <<= 1;
 113
 114         /* If some dubious ICMP arrived, penalize even more. */
 115         if (sk->err_soft)
 116                 orphans <<= 1;
 117
 118         if (orphans >= sysctl_tcp_max_orphans ||
 119             (sk->wmem_queued > SOCK_MIN_SNDBUF &&
 120              atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
 121                 if (net_ratelimit())
 122                         printk(KERN_INFO "Out of socket memory\n");
 123
 124                 /* Catch exceptional cases, when connection requires reset.
 125                  *      1. Last segment was sent recently. */
 126                 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 127                     /*  2. Window is closed. */
 128                     (!tp->snd_wnd && !tp->packets_out))
 129                         do_reset = 1;
 130                 if (do_reset)
 131                         tcp_send_active_reset(sk, GFP_ATOMIC);
 132                 tcp_done(sk);
 133                 NET_INC_STATS_BH(TCPAbortOnMemory);
 134                 return 1;
 135         }
 136         return 0;
 137 }
 138
 139 /* Calculate maximal number or retries on an orphaned socket. */
 140 static int tcp_orphan_retries(struct sock *sk, int alive)
 141 {
 142         int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 143
 144         /* We know from an ICMP that something is wrong. */
 145         if (sk->err_soft && !alive)
 146                 retries = 0;
 147
 148         /* However, if socket sent something recently, select some safe
 149          * number of retries. 8 corresponds to >100 seconds with minimal
 150          * RTO of 200msec. */
 151         if (retries == 0 && alive)
 152                 retries = 8;
 153         return retries;
 154 }
 155
 156 /* A write timeout has occurred. Process the after effects. */
 157 static int tcp_write_timeout(struct sock *sk)
 158 {
 159         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 160         int retry_until;
 161
 162         if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
 163                 if (tp->retransmits)
 164                         dst_negative_advice(&sk->dst_cache);
 165                 retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
 166         } else {
 167                 if (tp->retransmits >= sysctl_tcp_retries1) {
 168                         /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
 169                            hole detection. :-(
 170
 171                            It is place to make it. It is not made. I do not want
 172                            to make it. It is disguisting. It does not work in any
 173                            case. Let me to cite the same draft, which requires for
 174                            us to implement this:
 175
 176    "The one security concern raised by this memo is that ICMP black holes
 177    are often caused by over-zealous security administrators who block
 178    all ICMP messages.  It is vitally important that those who design and
 179    deploy security systems understand the impact of strict filtering on
 180    upper-layer protocols.  The safest web site in the world is worthless
 181    if most TCP implementations cannot transfer data from it.  It would
 182    be far nicer to have all of the black holes fixed rather than fixing
 183    all of the TCP implementations."
 184
 185                            Golden words :-).
 186                    */
 187
 188                         dst_negative_advice(&sk->dst_cache);
 189                 }
 190
 191                 retry_until = sysctl_tcp_retries2;
 192                 if (sk->dead) {
 193                         int alive = (tp->rto < TCP_RTO_MAX);
 194
 195                         retry_until = tcp_orphan_retries(sk, alive);
 196
 197                         if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
 198                                 return 1;
 199                 }
 200         }
 201
 202         if (tp->retransmits >= retry_until) {
 203                 /* Has it gone just too far? */
 204                 tcp_write_err(sk);
 205                 return 1;
 206         }
 207         return 0;
 208 }
 209
 210 static void tcp_delack_timer(unsigned long data)
 211 {
 212         struct sock *sk = (struct sock*)data;
 213         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 214
 215         bh_lock_sock(sk);
 216         if (sk->lock.users) {
 217                 /* Try again later. */
 218                 tp->ack.blocked = 1;
 219                 NET_INC_STATS_BH(DelayedACKLocked);
 220                 if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
 221                         sock_hold(sk);
 222                 goto out_unlock;
 223         }
 224
 225         tcp_mem_reclaim(sk);
 226
 227         if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER))
 228                 goto out;
 229
 230         if ((long)(tp->ack.timeout - jiffies) > 0) {
 231                 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
 232                         sock_hold(sk);
 233                 goto out;
 234         }
 235         tp->ack.pending &= ~TCP_ACK_TIMER;
 236
 237         if (skb_queue_len(&tp->ucopy.prequeue)) {
 238                 struct sk_buff *skb;
 239
 240                 net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue);
 241
 242                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 243                         sk->backlog_rcv(sk, skb);
 244
 245                 tp->ucopy.memory = 0;
 246         }
 247
 248         if (tcp_ack_scheduled(tp)) {
 249                 if (!tp->ack.pingpong) {
 250                         /* Delayed ACK missed: inflate ATO. */
 251                         tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
 252                 } else {
 253                         /* Delayed ACK missed: leave pingpong mode and
 254                          * deflate ATO.
 255                          */
 256                         tp->ack.pingpong = 0;
 257                         tp->ack.ato = TCP_ATO_MIN;
 258                 }
 259                 tcp_send_ack(sk);
 260                 NET_INC_STATS_BH(DelayedACKs);
 261         }
 262         TCP_CHECK_TIMER(sk);
 263
 264 out:
 265         if (tcp_memory_pressure)
 266                 tcp_mem_reclaim(sk);
 267 out_unlock:
 268         bh_unlock_sock(sk);
 269         sock_put(sk);
 270 }
 271
 272 static void tcp_probe_timer(struct sock *sk)
 273 {
 274         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 275         int max_probes;
 276
 277         if (tp->packets_out || !tp->send_head) {
 278                 tp->probes_out = 0;
 279                 return;
 280         }
 281
 282         /* *WARNING* RFC 1122 forbids this
 283          *
 284          * It doesn't AFAIK, because we kill the retransmit timer -AK
 285          *
 286          * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
 287          * this behaviour in Solaris down as a bug fix. [AC]
 288          *
 289          * Let me to explain. probes_out is zeroed by incoming ACKs
 290          * even if they advertise zero window. Hence, connection is killed only
 291          * if we received no ACKs for normal connection timeout. It is not killed
 292          * only because window stays zero for some time, window may be zero
 293          * until armageddon and even later. We are in full accordance
 294          * with RFCs, only probe timer combines both retransmission timeout
 295          * and probe timeout in one bottle.                             --ANK
 296          */
 297         max_probes = sysctl_tcp_retries2;
 298
 299         if (sk->dead) {
 300                 int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
 301
 302                 max_probes = tcp_orphan_retries(sk, alive);
 303
 304                 if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
 305                         return;
 306         }
 307
 308         if (tp->probes_out > max_probes) {
 309                 tcp_write_err(sk);
 310         } else {
 311                 /* Only send another probe if we didn't close things up. */
 312                 tcp_send_probe0(sk);
 313         }
 314 }
 315
 316 /*
 317  *      The TCP retransmit timer.
 318  */
 319
 320 static void tcp_retransmit_timer(struct sock *sk)
 321 {
 322         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 323
 324         if (tp->packets_out == 0)
 325                 goto out;
 326
 327         BUG_TRAP(!skb_queue_empty(&sk->write_queue));
 328
 329         if (tp->snd_wnd == 0 && !sk->dead &&
 330             !((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV))) {
 331                 /* Receiver dastardly shrinks window. Our retransmits
 332                  * become zero probes, but we should not timeout this
 333                  * connection. If the socket is an orphan, time it out,
 334                  * we cannot allow such beasts to hang infinitely.
 335                  */
 336 #ifdef TCP_DEBUG
 337                 if (net_ratelimit())
 338                         printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 339                                NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
 340                                tp->snd_una, tp->snd_nxt);
 341 #endif
 342                 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 343                         tcp_write_err(sk);
 344                         goto out;
 345                 }
 346                 tcp_enter_loss(sk, 0);
 347                 tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
 348                 __sk_dst_reset(sk);
 349                 goto out_reset_timer;
 350         }
 351
 352         if (tcp_write_timeout(sk))
 353                 goto out;
 354
 355         if (tp->retransmits == 0) {
 356                 if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
 357                         if (tp->sack_ok) {
 358                                 if (tp->ca_state == TCP_CA_Recovery)
 359                                         NET_INC_STATS_BH(TCPSackRecoveryFail);
 360                                 else
 361                                         NET_INC_STATS_BH(TCPSackFailures);
 362                         } else {
 363                                 if (tp->ca_state == TCP_CA_Recovery)
 364                                         NET_INC_STATS_BH(TCPRenoRecoveryFail);
 365                                 else
 366                                         NET_INC_STATS_BH(TCPRenoFailures);
 367                         }
 368                 } else if (tp->ca_state == TCP_CA_Loss) {
 369                         NET_INC_STATS_BH(TCPLossFailures);
 370                 } else {
 371                         NET_INC_STATS_BH(TCPTimeouts);
 372                 }
 373         }
 374
 375         if (tcp_use_frto(sk)) {
 376                 tcp_enter_frto(sk);
 377         } else {
 378                 tcp_enter_loss(sk, 0);
 379         }
 380
 381         if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
 382                 /* Retransmission failed because of local congestion,
 383                  * do not backoff.
 384                  */
 385                 if (!tp->retransmits)
 386                         tp->retransmits=1;
 387                 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
 388                                      min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
 389                 goto out;
 390         }
 391
 392         /* Increase the timeout each time we retransmit.  Note that
 393          * we do not increase the rtt estimate.  rto is initialized
 394          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 395          * that doubling rto each time is the least we can get away with.
 396          * In KA9Q, Karn uses this for the first few times, and then
 397          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 398          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 399          * defined in the protocol as the maximum possible RTT.  I guess
 400          * we'll have to use something other than TCP to talk to the
 401          * University of Mars.
 402          *
 403          * PAWS allows us longer timeouts and large windows, so once
 404          * implemented ftp to mars will work nicely. We will have to fix
 405          * the 120 second clamps though!
 406          */
 407         tp->backoff++;
 408         tp->retransmits++;
 409
 410 out_reset_timer:
 411         tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
 412         tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 413         if (tp->retransmits > sysctl_tcp_retries1)
 414                 __sk_dst_reset(sk);
 415
 416 out:;
 417 }
 418
 419 static void tcp_write_timer(unsigned long data)
 420 {
 421         struct sock *sk = (struct sock*)data;
 422         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 423         int event;
 424
 425         bh_lock_sock(sk);
 426         if (sk->lock.users) {
 427                 /* Try again later */
 428                 if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
 429                         sock_hold(sk);
 430                 goto out_unlock;
 431         }
 432
 433         if (sk->state == TCP_CLOSE || !tp->pending)
 434                 goto out;
 435
 436         if ((long)(tp->timeout - jiffies) > 0) {
 437                 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
 438                         sock_hold(sk);
 439                 goto out;
 440         }
 441
 442         event = tp->pending;
 443         tp->pending = 0;
 444
 445         switch (event) {
 446         case TCP_TIME_RETRANS:
 447                 tcp_retransmit_timer(sk);
 448                 break;
 449         case TCP_TIME_PROBE0:
 450                 tcp_probe_timer(sk);
 451                 break;
 452         }
 453         TCP_CHECK_TIMER(sk);
 454
 455 out:
 456         tcp_mem_reclaim(sk);
 457 out_unlock:
 458         bh_unlock_sock(sk);
 459         sock_put(sk);
 460 }
 461
 462 /*
 463  *      Timer for listening sockets
 464  */
 465
 466 static void tcp_synack_timer(struct sock *sk)
 467 {
 468         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 469         struct tcp_listen_opt *lopt = tp->listen_opt;
 470         int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
 471         int thresh = max_retries;
 472         unsigned long now = jiffies;
 473         struct open_request **reqp, *req;
 474         int i, budget;
 475
 476         if (lopt == NULL || lopt->qlen == 0)
 477                 return;
 478
 479         /* Normally all the openreqs are young and become mature
 480          * (i.e. converted to established socket) for first timeout.
 481          * If synack was not acknowledged for 3 seconds, it means
 482          * one of the following things: synack was lost, ack was lost,
 483          * rtt is high or nobody planned to ack (i.e. synflood).
 484          * When server is a bit loaded, queue is populated with old
 485          * open requests, reducing effective size of queue.
 486          * When server is well loaded, queue size reduces to zero
 487          * after several minutes of work. It is not synflood,
 488          * it is normal operation. The solution is pruning
 489          * too old entries overriding normal timeout, when
 490          * situation becomes dangerous.
 491          *
 492          * Essentially, we reserve half of room for young
 493          * embrions; and abort old ones without pity, if old
 494          * ones are about to clog our table.
 495          */
 496         if (lopt->qlen>>(lopt->max_qlen_log-1)) {
 497                 int young = (lopt->qlen_young<<1);
 498
 499                 while (thresh > 2) {
 500                         if (lopt->qlen < young)
 501                                 break;
 502                         thresh--;
 503                         young <<= 1;
 504                 }
 505         }
 506
 507         if (tp->defer_accept)
 508                 max_retries = tp->defer_accept;
 509
 510         budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
 511         i = lopt->clock_hand;
 512
 513         do {
 514                 reqp=&lopt->syn_table[i];
 515                 while ((req = *reqp) != NULL) {
 516                         if ((long)(now - req->expires) >= 0) {
 517                                 if ((req->retrans < thresh ||
 518                                      (req->acked && req->retrans < max_retries))
 519                                     && !req->class->rtx_syn_ack(sk, req, NULL)) {
 520                                         unsigned long timeo;
 521
 522                                         if (req->retrans++ == 0)
 523                                                 lopt->qlen_young--;
 524                                         timeo = min((TCP_TIMEOUT_INIT << req->retrans),
 525                                                     TCP_RTO_MAX);
 526                                         req->expires = now + timeo;
 527                                         reqp = &req->dl_next;
 528                                         continue;
 529                                 }
 530
 531                                 /* Drop this request */
 532                                 write_lock(&tp->syn_wait_lock);
 533                                 *reqp = req->dl_next;
 534                                 write_unlock(&tp->syn_wait_lock);
 535                                 lopt->qlen--;
 536                                 if (req->retrans == 0)
 537                                         lopt->qlen_young--;
 538                                 tcp_openreq_free(req);
 539                                 continue;
 540                         }
 541                         reqp = &req->dl_next;
 542                 }
 543
 544                 i = (i+1)&(TCP_SYNQ_HSIZE-1);
 545
 546         } while (--budget > 0);
 547
 548         lopt->clock_hand = i;
 549
 550         if (lopt->qlen)
 551                 tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
 552 }
 553
 554 void tcp_delete_keepalive_timer (struct sock *sk)
 555 {
 556         if (timer_pending(&sk->timer) && del_timer (&sk->timer))
 557                 __sock_put(sk);
 558 }
 559
 560 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
 561 {
 562         if (!mod_timer(&sk->timer, jiffies+len))
 563                 sock_hold(sk);
 564 }
 565
 566 void tcp_set_keepalive(struct sock *sk, int val)
 567 {
 568         if ((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))
 569                 return;
 570
 571         if (val && !sk->keepopen)
 572                 tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp));
 573         else if (!val)
 574                 tcp_delete_keepalive_timer(sk);
 575 }
 576
 577
 578 static void tcp_keepalive_timer (unsigned long data)
 579 {
 580         struct sock *sk = (struct sock *) data;
 581         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 582         __u32 elapsed;
 583
 584         /* Only process if socket is not in use. */
 585         bh_lock_sock(sk);
 586         if (sk->lock.users) {
 587                 /* Try again later. */
 588                 tcp_reset_keepalive_timer (sk, HZ/20);
 589                 goto out;
 590         }
 591
 592         if (sk->state == TCP_LISTEN) {
 593                 tcp_synack_timer(sk);
 594                 goto out;
 595         }
 596
 597         if (sk->state == TCP_FIN_WAIT2 && sk->dead) {
 598                 if (tp->linger2 >= 0) {
 599                         int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
 600
 601                         if (tmo > 0) {
 602                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 603                                 goto out;
 604                         }
 605                 }
 606                 tcp_send_active_reset(sk, GFP_ATOMIC);
 607                 goto death;
 608         }
 609
 610         if (!sk->keepopen || sk->state == TCP_CLOSE)
 611                 goto out;
 612
 613         elapsed = keepalive_time_when(tp);
 614
 615         /* It is alive without keepalive 8) */
 616         if (tp->packets_out || tp->send_head)
 617                 goto resched;
 618
 619         elapsed = tcp_time_stamp - tp->rcv_tstamp;
 620
 621         if (elapsed >= keepalive_time_when(tp)) {
 622                 if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
 623                      (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
 624                         tcp_send_active_reset(sk, GFP_ATOMIC);
 625                         tcp_write_err(sk);
 626                         goto out;
 627                 }
 628                 if (tcp_write_wakeup(sk) <= 0) {
 629                         tp->probes_out++;
 630                         elapsed = keepalive_intvl_when(tp);
 631                 } else {
 632                         /* If keepalive was lost due to local congestion,
 633                          * try harder.
 634                          */
 635                         elapsed = TCP_RESOURCE_PROBE_INTERVAL;
 636                 }
 637         } else {
 638                 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
 639                 elapsed = keepalive_time_when(tp) - elapsed;
 640         }
 641
 642         TCP_CHECK_TIMER(sk);
 643         tcp_mem_reclaim(sk);
 644
 645 resched:
 646         tcp_reset_keepalive_timer (sk, elapsed);
 647         goto out;
 648
 649 death:
 650         tcp_done(sk);
 651
 652 out:
 653         bh_unlock_sock(sk);
 654         sock_put(sk);
 655 }