X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp.c;h=a6b228914b8ef08d38262c9cbd42a2e61bf0b103;hb=6ce61a7c2678800cfe59a5f4a41ce8f785b9d355;hp=a97450e9eb34692558246464101ff58259aadda6;hpb=93fdf10d4c28edaa1b9f80e7f9c3002359186d00;p=powerpc.git diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a97450e9eb..a6b228914b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -258,6 +258,7 @@ #include #include #include +#include #include #include @@ -268,7 +269,7 @@ #include #include -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; @@ -462,11 +463,12 @@ static inline int forced_push(struct tcp_sock *tp) static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { - skb->csum = 0; - TCP_SKB_CB(skb)->seq = tp->write_seq; - TCP_SKB_CB(skb)->end_seq = tp->write_seq; - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = 0; + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + skb->csum = 0; + tcb->seq = tcb->end_seq = tp->write_seq; + tcb->flags = TCPCB_FLAG_ACK; + tcb->sacked = 0; skb_header_release(skb); __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); @@ -568,7 +570,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -642,7 +644,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp) int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { - if (sk->sk_route_caps & NETIF_F_TSO) + if (sk_can_gso(sk)) tmp = 0; else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -723,7 +725,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -955,8 +957,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1132,7 +1137,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else @@ -1659,7 +1664,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; @@ -1938,6 +1944,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; +#ifdef CONFIG_TCP_MD5SIG + case TCP_MD5SIG: + /* Read the IP->Key mappings from userspace */ + err = tp->af_specific->md5_parse(sk, optval, optlen); + break; +#endif + default: err = -ENOPROTOOPT; break; @@ -2150,7 +2163,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) struct tcphdr *th; unsigned thlen; unsigned int seq; - unsigned int delta; + __be32 delta; unsigned int oldlen; unsigned int len; @@ -2165,13 +2178,30 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (!pskb_may_pull(skb, thlen)) goto out; - segs = NULL; - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) - goto out; - oldlen = (u16)~skb->len; __skb_pull(skb, thlen); + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + int mss; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + mss = skb_shinfo(skb)->gso_size; + skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + + segs = NULL; + goto out; + } + segs = skb_segment(skb, features); if (IS_ERR(segs)) goto out; @@ -2186,8 +2216,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) do { th->fin = th->psh = 0; - th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2200,14 +2231,145 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) } while (skb->next); delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); - th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); out: return segs; } +EXPORT_SYMBOL(tcp_tso_segment); + +#ifdef CONFIG_TCP_MD5SIG +static unsigned long tcp_md5sig_users; +static struct tcp_md5sig_pool **tcp_md5sig_pool; +static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); + +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); + if (p) { + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); + kfree(p); + p = NULL; + } + } + free_percpu(pool); +} + +void tcp_free_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool = NULL; + + spin_lock(&tcp_md5sig_pool_lock); + if (--tcp_md5sig_users == 0) { + pool = tcp_md5sig_pool; + tcp_md5sig_pool = NULL; + } + spin_unlock(&tcp_md5sig_pool_lock); + if (pool) + __tcp_free_md5sig_pool(pool); +} + +EXPORT_SYMBOL(tcp_free_md5sig_pool); + +struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) +{ + int cpu; + struct tcp_md5sig_pool **pool; + + pool = alloc_percpu(struct tcp_md5sig_pool *); + if (!pool) + return NULL; + + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p; + struct crypto_hash *hash; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + goto out_free; + *per_cpu_ptr(pool, cpu) = p; + + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (!hash || IS_ERR(hash)) + goto out_free; + + p->md5_desc.tfm = hash; + } + return pool; +out_free: + __tcp_free_md5sig_pool(pool); + return NULL; +} + +struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool; + int alloc = 0; + +retry: + spin_lock(&tcp_md5sig_pool_lock); + pool = tcp_md5sig_pool; + if (tcp_md5sig_users++ == 0) { + alloc = 1; + spin_unlock(&tcp_md5sig_pool_lock); + } else if (!pool) { + tcp_md5sig_users--; + spin_unlock(&tcp_md5sig_pool_lock); + cpu_relax(); + goto retry; + } else + spin_unlock(&tcp_md5sig_pool_lock); + + if (alloc) { + /* we cannot hold spinlock here because this may sleep. */ + struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); + spin_lock(&tcp_md5sig_pool_lock); + if (!p) { + tcp_md5sig_users--; + spin_unlock(&tcp_md5sig_pool_lock); + return NULL; + } + pool = tcp_md5sig_pool; + if (pool) { + /* oops, it has already been assigned. */ + spin_unlock(&tcp_md5sig_pool_lock); + __tcp_free_md5sig_pool(p); + } else { + tcp_md5sig_pool = pool = p; + spin_unlock(&tcp_md5sig_pool_lock); + } + } + return pool; +} + +EXPORT_SYMBOL(tcp_alloc_md5sig_pool); + +struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) +{ + struct tcp_md5sig_pool **p; + spin_lock(&tcp_md5sig_pool_lock); + p = tcp_md5sig_pool; + if (p) + tcp_md5sig_users++; + spin_unlock(&tcp_md5sig_pool_lock); + return (p ? *per_cpu_ptr(p, cpu) : NULL); +} + +EXPORT_SYMBOL(__tcp_get_md5sig_pool); + +void __tcp_put_md5sig_pool(void) { + __tcp_free_md5sig_pool(tcp_md5sig_pool); +} + +EXPORT_SYMBOL(__tcp_put_md5sig_pool); +#endif extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; @@ -2235,9 +2397,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!tcp_hashinfo.bind_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); /* Size and allocate the main established and bind bucket * hash tables. @@ -2250,7 +2410,7 @@ void __init tcp_init(void) thash_entries, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.ehash_size, NULL, 0); @@ -2266,7 +2426,7 @@ void __init tcp_init(void) tcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.bhash_size, NULL, 64 * 1024); @@ -2296,9 +2456,10 @@ void __init tcp_init(void) sysctl_max_syn_backlog = 128; } - sysctl_tcp_mem[0] = 768 << order; - sysctl_tcp_mem[1] = 1024 << order; - sysctl_tcp_mem[2] = 1536 << order; + /* Allow no more than 3/4 kernel memory (usually less) allocated to TCP */ + sysctl_tcp_mem[0] = (1536 / sizeof (struct inet_bind_hashbucket)) << order; + sysctl_tcp_mem[1] = sysctl_tcp_mem[0] * 4 / 3; + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit);