Merge master.kernel.org:/pub/scm/linux/kernel/git/gregkh/driver-2.6
[powerpc.git] / net / core / skbuff.c
index ef498cb..d0732e9 100644 (file)
@@ -68,7 +68,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
 /*
  *     Keep out-of-line to prevent kernel bloat.
@@ -118,9 +119,11 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  */
 
 /**
- *     alloc_skb       -       allocate a network buffer
+ *     __alloc_skb     -       allocate a network buffer
  *     @size: size to allocate
  *     @gfp_mask: allocation mask
+ *     @fclone: allocate from fclone cache instead of head cache
+ *             and allocate a cloned (child) skb
  *
  *     Allocate a new &sk_buff. The returned buffer has no headroom and a
  *     tail room of size bytes. The object has a reference count of one.
@@ -129,14 +132,16 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  *     Buffers may only be allocated from interrupts using a @gfp_mask of
  *     %GFP_ATOMIC.
  */
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+                           int fclone)
 {
+       struct skb_shared_info *shinfo;
        struct sk_buff *skb;
        u8 *data;
 
        /* Get the HEAD */
-       skb = kmem_cache_alloc(skbuff_head_cache,
-                              gfp_mask & ~__GFP_DMA);
+       skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
+                               gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
 
@@ -153,12 +158,25 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
-
-       atomic_set(&(skb_shinfo(skb)->dataref), 1);
-       skb_shinfo(skb)->nr_frags  = 0;
-       skb_shinfo(skb)->tso_size = 0;
-       skb_shinfo(skb)->tso_segs = 0;
-       skb_shinfo(skb)->frag_list = NULL;
+       /* make sure we initialize shinfo sequentially */
+       shinfo = skb_shinfo(skb);
+       atomic_set(&shinfo->dataref, 1);
+       shinfo->nr_frags  = 0;
+       shinfo->tso_size = 0;
+       shinfo->tso_segs = 0;
+       shinfo->ufo_size = 0;
+       shinfo->ip6_frag_id = 0;
+       shinfo->frag_list = NULL;
+
+       if (fclone) {
+               struct sk_buff *child = skb + 1;
+               atomic_t *fclone_ref = (atomic_t *) (child + 1);
+
+               skb->fclone = SKB_FCLONE_ORIG;
+               atomic_set(fclone_ref, 1);
+
+               child->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
 out:
        return skb;
 nodata:
@@ -183,7 +201,7 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                     unsigned int size,
-                                    unsigned int __nocast gfp_mask)
+                                    gfp_t gfp_mask)
 {
        struct sk_buff *skb;
        u8 *data;
@@ -266,8 +284,34 @@ void skb_release_data(struct sk_buff *skb)
  */
 void kfree_skbmem(struct sk_buff *skb)
 {
+       struct sk_buff *other;
+       atomic_t *fclone_ref;
+
        skb_release_data(skb);
-       kmem_cache_free(skbuff_head_cache, skb);
+       switch (skb->fclone) {
+       case SKB_FCLONE_UNAVAILABLE:
+               kmem_cache_free(skbuff_head_cache, skb);
+               break;
+
+       case SKB_FCLONE_ORIG:
+               fclone_ref = (atomic_t *) (skb + 2);
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, skb);
+               break;
+
+       case SKB_FCLONE_CLONE:
+               fclone_ref = (atomic_t *) (skb + 1);
+               other = skb - 1;
+
+               /* The clone portion is available for
+                * fast-cloning again.
+                */
+               skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+               if (atomic_dec_and_test(fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, other);
+               break;
+       };
 }
 
 /**
@@ -291,6 +335,9 @@ void __kfree_skb(struct sk_buff *skb)
        }
 #ifdef CONFIG_NETFILTER
        nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(skb->nf_bridge);
 #endif
@@ -320,18 +367,28 @@ void __kfree_skb(struct sk_buff *skb)
  *     %GFP_ATOMIC.
  */
 
-struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
-       struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
-       if (!n) 
-               return NULL;
+       struct sk_buff *n;
+
+       n = skb + 1;
+       if (skb->fclone == SKB_FCLONE_ORIG &&
+           n->fclone == SKB_FCLONE_UNAVAILABLE) {
+               atomic_t *fclone_ref = (atomic_t *) (n + 1);
+               n->fclone = SKB_FCLONE_CLONE;
+               atomic_inc(fclone_ref);
+       } else {
+               n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+               if (!n)
+                       return NULL;
+               n->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
 
 #define C(x) n->x = skb->x
 
        n->next = n->prev = NULL;
        n->sk = NULL;
-       C(stamp);
+       C(tstamp);
        C(dev);
        C(h);
        C(nh);
@@ -359,6 +416,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
        C(nfct);
        nf_conntrack_get(skb->nfct);
        C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+       C(ipvs_property);
+#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       C(nfct_reasm);
+       nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        C(nf_bridge);
        nf_bridge_get(skb->nf_bridge);
@@ -407,14 +475,22 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->mac.raw    = old->mac.raw + offset;
        memcpy(new->cb, old->cb, sizeof(old->cb));
        new->local_df   = old->local_df;
+       new->fclone     = SKB_FCLONE_UNAVAILABLE;
        new->pkt_type   = old->pkt_type;
-       new->stamp      = old->stamp;
+       new->tstamp     = old->tstamp;
        new->destructor = NULL;
 #ifdef CONFIG_NETFILTER
        new->nfmark     = old->nfmark;
        new->nfct       = old->nfct;
        nf_conntrack_get(old->nfct);
        new->nfctinfo   = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+       new->nfct_reasm = old->nfct_reasm;
+       nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+       new->ipvs_property = old->ipvs_property;
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        new->nf_bridge  = old->nf_bridge;
        nf_bridge_get(old->nf_bridge);
@@ -448,7 +524,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  *     header is going to be modified. Use pskb_copy() instead.
  */
 
-struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 {
        int headerlen = skb->data - skb->head;
        /*
@@ -487,7 +563,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma
  *     The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 {
        /*
         *      Allocate the copy buffer
@@ -546,7 +622,7 @@ out:
  */
 
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
-                    unsigned int __nocast gfp_mask)
+                    gfp_t gfp_mask)
 {
        int i;
        u8 *data;
@@ -637,7 +713,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  */
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
                                int newheadroom, int newtailroom,
-                               unsigned int __nocast gfp_mask)
+                               gfp_t gfp_mask)
 {
        /*
         *      Allocate the copy buffer
@@ -715,8 +791,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
                int end = offset + skb_shinfo(skb)->frags[i].size;
                if (end > len) {
                        if (skb_cloned(skb)) {
-                               if (!realloc)
-                                       BUG();
+                               BUG_ON(!realloc);
                                if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
                                        return -ENOMEM;
                        }
@@ -818,8 +893,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
                struct sk_buff *insp = NULL;
 
                do {
-                       if (!list)
-                               BUG();
+                       BUG_ON(!list);
 
                        if (list->len <= eat) {
                                /* Eaten as whole. */
@@ -1123,8 +1197,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
                        start = end;
                }
        }
-       if (len)
-               BUG();
+       BUG_ON(len);
 
        return csum;
 }
@@ -1206,8 +1279,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
                        start = end;
                }
        }
-       if (len)
-               BUG();
+       BUG_ON(len);
        return csum;
 }
 
@@ -1221,8 +1293,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
        else
                csstart = skb_headlen(skb);
 
-       if (csstart > skb_headlen(skb))
-               BUG();
+       BUG_ON(csstart > skb_headlen(skb));
 
        memcpy(to, skb->data, csstart);
 
@@ -1636,6 +1707,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
        return textsearch_find(config, state);
 }
 
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock  structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns  -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+                       int (*getfrag)(void *from, char *to, int offset,
+                                       int len, int odd, struct sk_buff *skb),
+                       void *from, int length)
+{
+       int frg_cnt = 0;
+       skb_frag_t *frag = NULL;
+       struct page *page = NULL;
+       int copy, left;
+       int offset = 0;
+       int ret;
+
+       do {
+               /* Return error if we don't have space for new frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               if (frg_cnt >= MAX_SKB_FRAGS)
+                       return -EFAULT;
+
+               /* allocate a new page for next frag */
+               page = alloc_pages(sk->sk_allocation, 0);
+
+               /* If alloc_page fails just return failure and caller will
+                * free previous allocated pages by doing kfree_skb()
+                */
+               if (page == NULL)
+                       return -ENOMEM;
+
+               /* initialize the next frag */
+               sk->sk_sndmsg_page = page;
+               sk->sk_sndmsg_off = 0;
+               skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+               skb->truesize += PAGE_SIZE;
+               atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+               /* get the new initialized frag */
+               frg_cnt = skb_shinfo(skb)->nr_frags;
+               frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+               /* copy the user data to page */
+               left = PAGE_SIZE - frag->page_offset;
+               copy = (length > left)? left : length;
+
+               ret = getfrag(from, (page_address(frag->page) +
+                           frag->page_offset + frag->size),
+                           offset, copy, 0, skb);
+               if (ret < 0)
+                       return -EFAULT;
+
+               /* copy was successful so update the size parameters */
+               sk->sk_sndmsg_off += copy;
+               frag->size += copy;
+               skb->len += copy;
+               skb->data_len += copy;
+               offset += copy;
+               length -= copy;
+
+       } while (length > 0);
+
+       return 0;
+}
+
 void __init skb_init(void)
 {
        skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1645,12 +1788,21 @@ void __init skb_init(void)
                                              NULL, NULL);
        if (!skbuff_head_cache)
                panic("cannot create skbuff cache");
+
+       skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+                                               (2*sizeof(struct sk_buff)) +
+                                               sizeof(atomic_t),
+                                               0,
+                                               SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+       if (!skbuff_fclone_cache)
+               panic("cannot create skbuff cache");
 }
 
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
 EXPORT_SYMBOL(pskb_copy);
 EXPORT_SYMBOL(pskb_expand_head);
 EXPORT_SYMBOL(skb_checksum);
@@ -1678,3 +1830,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
 EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);