+static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+{
+ u32 window, cwnd_len;
+
+ window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
+ cwnd_len = mss_now * cwnd;
+ return min(window, cwnd_len);
+}
+
+/* Can at least one segment of SKB be sent right now, according to the
+ * congestion window rules? If so, return how many segments are allowed.
+ */
+static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ u32 in_flight, cwnd;
+
+ /* Don't be strict about the congestion window for the final FIN. */
+ if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+ return 1;
+
+ in_flight = tcp_packets_in_flight(tp);
+ cwnd = tp->snd_cwnd;
+ if (in_flight < cwnd)
+ return (cwnd - in_flight);
+
+ return 0;
+}
+
+/* This must be invoked the first time we consider transmitting
+ * SKB onto the wire.
+ */
+static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+{
+ int tso_segs = tcp_skb_pcount(skb);
+
+ if (!tso_segs ||
+ (tso_segs > 1 &&
+ skb_shinfo(skb)->tso_size != mss_now)) {
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tso_segs = tcp_skb_pcount(skb);
+ }
+ return tso_segs;
+}
+
+static inline int tcp_minshall_check(const struct tcp_sock *tp)
+{
+ return after(tp->snd_sml,tp->snd_una) &&
+ !after(tp->snd_sml, tp->snd_nxt);
+}
+
+/* Return 0, if packet can be sent now without violation Nagle's rules:
+ * 1. It is full sized.
+ * 2. Or it contains FIN. (already checked by caller)
+ * 3. Or TCP_NODELAY was set.
+ * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
+ * With Minshall's modification: all sent small packets are ACKed.
+ */
+
+static inline int tcp_nagle_check(const struct tcp_sock *tp,
+ const struct sk_buff *skb,
+ unsigned mss_now, int nonagle)
+{
+ return (skb->len < mss_now &&
+ ((nonagle&TCP_NAGLE_CORK) ||
+ (!nonagle &&
+ tp->packets_out &&
+ tcp_minshall_check(tp))));
+}
+
+/* Return non-zero if the Nagle test allows this packet to be
+ * sent now.
+ */
+static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
+ unsigned int cur_mss, int nonagle)
+{
+ /* Nagle rule does not apply to frames, which sit in the middle of the
+ * write_queue (they have no chances to get new data).
+ *
+ * This is implemented in the callers, where they modify the 'nonagle'
+ * argument based upon the location of SKB in the send queue.
+ */
+ if (nonagle & TCP_NAGLE_PUSH)
+ return 1;
+
+ /* Don't use the nagle rule for urgent data (or for the final FIN). */
+ if (tp->urg_mode ||
+ (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+ return 1;
+
+ if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
+ return 1;
+
+ return 0;
+}
+
+/* Does at least the first segment of SKB fit into the send window? */
+static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
+{
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ if (skb->len > cur_mss)
+ end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
+
+ return !after(end_seq, tp->snd_una + tp->snd_wnd);
+}
+
+/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+ * should be put on the wire right now. If so, it returns the number of
+ * packets allowed by the congestion window.
+ */
+static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
+ unsigned int cur_mss, int nonagle)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int cwnd_quota;
+
+ tcp_init_tso_segs(sk, skb, cur_mss);
+
+ if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
+ return 0;
+
+ cwnd_quota = tcp_cwnd_test(tp, skb);
+ if (cwnd_quota &&
+ !tcp_snd_wnd_test(tp, skb, cur_mss))
+ cwnd_quota = 0;
+
+ return cwnd_quota;
+}
+
+static inline int tcp_skb_is_last(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return skb->next == (struct sk_buff *)&sk->sk_write_queue;
+}
+
+int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
+{
+ struct sk_buff *skb = sk->sk_send_head;
+
+ return (skb &&
+ tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
+ (tcp_skb_is_last(sk, skb) ?
+ TCP_NAGLE_PUSH :
+ tp->nonagle)));
+}
+
+/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
+ * which is put after SKB on the list. It is very much like
+ * tcp_fragment() except that it may make several kinds of assumptions
+ * in order to speed up the splitting operation. In particular, we
+ * know that all the data is in scatter-gather pages, and that the
+ * packet has never been sent out before (and thus is not cloned).
+ */
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
+{
+ struct sk_buff *buff;
+ int nlen = skb->len - len;
+ u16 flags;
+
+ /* All of a TSO frame must be composed of paged data. */
+ if (skb->len != skb->data_len)
+ return tcp_fragment(sk, skb, len, mss_now);
+
+ buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
+ if (unlikely(buff == NULL))
+ return -ENOMEM;
+
+ buff->truesize = nlen;
+ skb->truesize -= nlen;
+
+ /* Correct the sequence numbers. */
+ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+ TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+
+ /* PSH and FIN should only be set in the second packet. */
+ flags = TCP_SKB_CB(skb)->flags;
+ TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+ TCP_SKB_CB(buff)->flags = flags;
+
+ /* This packet was never sent out yet, so no SACK bits. */
+ TCP_SKB_CB(buff)->sacked = 0;
+
+ buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+ skb_split(skb, buff, len);
+
+ /* Fix up tso_factor for both original and new SKB. */
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tcp_set_skb_tso_segs(sk, buff, mss_now);
+
+ /* Link BUFF into the send queue. */
+ skb_header_release(buff);
+ __skb_append(skb, buff, &sk->sk_write_queue);
+
+ return 0;
+}
+
+/* Try to defer sending, if possible, in order to minimize the amount
+ * of TSO splitting we do. View it as a kind of TSO Nagle test.
+ *
+ * This algorithm is from John Heffner.
+ */
+static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 send_win, cong_win, limit, in_flight;
+
+ if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+ return 0;
+
+ if (icsk->icsk_ca_state != TCP_CA_Open)
+ return 0;
+
+ in_flight = tcp_packets_in_flight(tp);
+
+ BUG_ON(tcp_skb_pcount(skb) <= 1 ||
+ (tp->snd_cwnd <= in_flight));
+
+ send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
+
+ /* From in_flight test above, we know that cwnd > in_flight. */
+ cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+
+ limit = min(send_win, cong_win);
+
+ if (sysctl_tcp_tso_win_divisor) {
+ u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+
+ /* If at least some fraction of a window is available,
+ * just use it.
+ */
+ chunk /= sysctl_tcp_tso_win_divisor;
+ if (limit >= chunk)
+ return 0;
+ } else {
+ /* Different approach, try not to defer past a single
+ * ACK. Receiver should ACK every other full sized
+ * frame, so if we have space for more than 3 frames
+ * then send now.
+ */
+ if (limit > tcp_max_burst(tp) * tp->mss_cache)
+ return 0;
+ }
+
+ /* Ok, it looks like it is advisable to defer. */
+ return 1;