Merge master.kernel.org:/home/rmk/linux-2.6-arm
[powerpc.git] / net / ipv4 / tcp_output.c
index f458eac..caf2e2c 100644 (file)
@@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
        u32 restart_cwnd = tcp_init_cwnd(tp, dst);
        u32 cwnd = tp->snd_cwnd;
 
-       tcp_ca_event(tp, CA_EVENT_CWND_RESTART);
+       tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
 
-       tp->snd_ssthresh = tcp_current_ssthresh(tp);
+       tp->snd_ssthresh = tcp_current_ssthresh(sk);
        restart_cwnd = min(restart_cwnd, cwnd);
 
        while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
@@ -190,15 +190,16 @@ void tcp_select_initial_window(int __space, __u32 mss,
        }
 
        /* Set initial window to value enough for senders,
-        * following RFC1414. Senders, not following this RFC,
+        * following RFC2414. Senders, not following this RFC,
         * will be satisfied with 2.
         */
        if (mss > (1<<*rcv_wscale)) {
-               int init_cwnd = 4;
-               if (mss > 1460*3)
+               int init_cwnd;
+
+               if (mss > 1460)
                        init_cwnd = 2;
-               else if (mss > 1460)
-                       init_cwnd = 3;
+               else
+                       init_cwnd = (mss > 1095) ? 3 : 4;
                if (*rcv_wnd > init_cwnd*mss)
                        *rcv_wnd = init_cwnd*mss;
        }
@@ -265,6 +266,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 {
        if (skb != NULL) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
                struct inet_sock *inet = inet_sk(sk);
                struct tcp_sock *tp = tcp_sk(sk);
                struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -280,8 +282,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 #define SYSCTL_FLAG_SACK       0x4
 
                /* If congestion control is doing timestamping */
-               if (tp->ca_ops->rtt_sample)
-                       do_gettimeofday(&skb->stamp);
+               if (icsk->icsk_ca_ops->rtt_sample)
+                       __net_timestamp(skb);
 
                sysctl_flags = 0;
                if (tcb->flags & TCPCB_FLAG_SYN) {
@@ -308,7 +310,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                }
                
                if (tcp_packets_in_flight(tp) == 0)
-                       tcp_ca_event(tp, CA_EVENT_TX_START);
+                       tcp_ca_event(sk, CA_EVENT_TX_START);
 
                th = (struct tcphdr *) skb_push(skb, tcp_header_size);
                skb->h.th = th;
@@ -366,7 +368,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                if (err <= 0)
                        return err;
 
-               tcp_enter_cwr(tp);
+               tcp_enter_cwr(sk);
 
                /* NET_XMIT_CN is special. It does not guarantee,
                 * that this packet is lost. It tells that device
@@ -427,13 +429,15 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
  * packet to the list.  This won't be called frequently, I hope. 
  * Remember, these are still headerless SKBs at this point.
  */
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *buff;
-       int nsize;
+       int nsize, old_factor;
        u16 flags;
 
+       BUG_ON(len >= skb->len);
+
        nsize = skb_headlen(skb) - len;
        if (nsize < 0)
                nsize = 0;
@@ -458,9 +462,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned
        flags = TCP_SKB_CB(skb)->flags;
        TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
        TCP_SKB_CB(buff)->flags = flags;
-       TCP_SKB_CB(buff)->sacked =
-               (TCP_SKB_CB(skb)->sacked &
-                (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
+       TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
        TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
        if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -482,25 +484,46 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned
         * skbs, which it never sent before. --ANK
         */
        TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
-       buff->stamp = skb->stamp;
+       buff->tstamp = skb->tstamp;
 
-       if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
-               tp->lost_out -= tcp_skb_pcount(skb);
-               tp->left_out -= tcp_skb_pcount(skb);
-       }
+       old_factor = tcp_skb_pcount(skb);
 
        /* Fix up tso_factor for both original and new SKB.  */
        tcp_set_skb_tso_segs(sk, skb, mss_now);
        tcp_set_skb_tso_segs(sk, buff, mss_now);
 
-       if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
-               tp->lost_out += tcp_skb_pcount(skb);
-               tp->left_out += tcp_skb_pcount(skb);
-       }
+       /* If this packet has been sent out already, we must
+        * adjust the various packet counters.
+        */
+       if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
+               int diff = old_factor - tcp_skb_pcount(skb) -
+                       tcp_skb_pcount(buff);
+
+               tp->packets_out -= diff;
+
+               if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+                       tp->sacked_out -= diff;
+               if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+                       tp->retrans_out -= diff;
 
-       if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
-               tp->lost_out += tcp_skb_pcount(buff);
-               tp->left_out += tcp_skb_pcount(buff);
+               if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+                       tp->lost_out -= diff;
+                       tp->left_out -= diff;
+               }
+
+               if (diff > 0) {
+                       /* Adjust Reno SACK estimate. */
+                       if (!tp->rx_opt.sack_ok) {
+                               tp->sacked_out -= diff;
+                               if ((int)tp->sacked_out < 0)
+                                       tp->sacked_out = 0;
+                               tcp_sync_left_out(tp);
+                       }
+
+                       tp->fackets_out -= diff;
+                       if ((int)tp->fackets_out < 0)
+                               tp->fackets_out = 0;
+               }
        }
 
        /* Link BUFF into the send queue. */
@@ -905,12 +928,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  */
 static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
 {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 send_win, cong_win, limit, in_flight;
 
        if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
                return 0;
 
-       if (tp->ca_state != TCP_CA_Open)
+       if (icsk->icsk_ca_state != TCP_CA_Open)
                return 0;
 
        in_flight = tcp_packets_in_flight(tp);
@@ -1287,6 +1311,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
  */ 
 void tcp_simple_retransmit(struct sock *sk)
 {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk, 0);
@@ -1317,12 +1342,12 @@ void tcp_simple_retransmit(struct sock *sk)
         * in network, but units changed and effective
         * cwnd/ssthresh really reduced now.
         */
-       if (tp->ca_state != TCP_CA_Loss) {
+       if (icsk->icsk_ca_state != TCP_CA_Loss) {
                tp->high_seq = tp->snd_nxt;
-               tp->snd_ssthresh = tcp_current_ssthresh(tp);
+               tp->snd_ssthresh = tcp_current_ssthresh(sk);
                tp->prior_ssthresh = 0;
                tp->undo_marker = 0;
-               tcp_set_ca_state(tp, TCP_CA_Loss);
+               tcp_set_ca_state(sk, TCP_CA_Loss);
        }
        tcp_xmit_retransmit_queue(sk);
 }
@@ -1347,12 +1372,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
                if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
                        BUG();
-
-               if (sk->sk_route_caps & NETIF_F_TSO) {
-                       sk->sk_route_caps &= ~NETIF_F_TSO;
-                       sock_set_flag(sk, SOCK_NO_LARGESEND);
-               }
-
                if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
                        return -ENOMEM;
        }
@@ -1367,22 +1386,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                return -EAGAIN;
 
        if (skb->len > cur_mss) {
-               int old_factor = tcp_skb_pcount(skb);
-               int diff;
-
                if (tcp_fragment(sk, skb, cur_mss, cur_mss))
                        return -ENOMEM; /* We'll try again later. */
-
-               /* New SKB created, account for it. */
-               diff = old_factor - tcp_skb_pcount(skb) -
-                      tcp_skb_pcount(skb->next);
-               tp->packets_out -= diff;
-
-               if (diff > 0) {
-                       tp->fackets_out -= diff;
-                       if ((int)tp->fackets_out < 0)
-                               tp->fackets_out = 0;
-               }
        }
 
        /* Collapse two adjacent packets if worthwhile and we can. */
@@ -1462,6 +1467,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
  */
 void tcp_xmit_retransmit_queue(struct sock *sk)
 {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        int packet_cnt = tp->lost_out;
@@ -1485,7 +1491,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                                if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
                                        if (tcp_retransmit_skb(sk, skb))
                                                return;
-                                       if (tp->ca_state != TCP_CA_Loss)
+                                       if (icsk->icsk_ca_state != TCP_CA_Loss)
                                                NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
                                        else
                                                NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
@@ -1507,7 +1513,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
        /* OK, demanded retransmission is finished. */
 
        /* Forward retransmissions are possible only during Recovery. */
-       if (tp->ca_state != TCP_CA_Recovery)
+       if (icsk->icsk_ca_state != TCP_CA_Recovery)
                return;
 
        /* No forward retransmissions in Reno are possible. */
@@ -1578,7 +1584,7 @@ void tcp_send_fin(struct sock *sk)
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
-                       skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+                       skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
                        if (skb)
                                break;
                        yield();
@@ -1800,7 +1806,7 @@ int tcp_connect(struct sock *sk)
 
        tcp_connect_init(sk);
 
-       buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+       buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
        if (unlikely(buff == NULL))
                return -ENOBUFS;
 
@@ -1989,12 +1995,6 @@ int tcp_write_wakeup(struct sock *sk)
                                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
                                if (tcp_fragment(sk, skb, seg_size, mss))
                                        return -1;
-                               /* SWS override triggered forced fragmentation.
-                                * Disable TSO, the connection is too sick. */
-                               if (sk->sk_route_caps & NETIF_F_TSO) {
-                                       sock_set_flag(sk, SOCK_NO_LARGESEND);
-                                       sk->sk_route_caps &= ~NETIF_F_TSO;
-                               }
                        } else if (!tcp_skb_pcount(skb))
                                tcp_set_skb_tso_segs(sk, skb, mss);
 
@@ -2028,7 +2028,7 @@ void tcp_send_probe0(struct sock *sk)
 
        if (tp->packets_out || !sk->sk_send_head) {
                /* Cancel probe timer, if it is not required. */
-               tp->probes_out = 0;
+               icsk->icsk_probes_out = 0;
                icsk->icsk_backoff = 0;
                return;
        }
@@ -2036,19 +2036,19 @@ void tcp_send_probe0(struct sock *sk)
        if (err <= 0) {
                if (icsk->icsk_backoff < sysctl_tcp_retries2)
                        icsk->icsk_backoff++;
-               tp->probes_out++;
+               icsk->icsk_probes_out++;
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
                                          min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
                                          TCP_RTO_MAX);
        } else {
                /* If packet was not sent due to local congestion,
-                * do not backoff and do not remember probes_out.
+                * do not backoff and do not remember icsk_probes_out.
                 * Let local senders to fight for local resources.
                 *
                 * Use accumulated backoff yet.
                 */
-               if (!tp->probes_out)
-                       tp->probes_out=1;
+               if (!icsk->icsk_probes_out)
+                       icsk->icsk_probes_out = 1;
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
                                          min(icsk->icsk_rto << icsk->icsk_backoff,
                                              TCP_RESOURCE_PROBE_INTERVAL),