import of ftp.dlink.com/GPL/DSMG-600_reB/ppclinux.tar.gz
[linux-2.4.21-pre4.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.1.1.1 2005/04/11 02:51:14 jack Exp $
9  *
10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *
38  *              This program is free software; you can redistribute it and/or
39  *              modify it under the terms of the GNU General Public License
40  *              as published by the Free Software Foundation; either version
41  *              2 of the License, or (at your option) any later version.
42  *
43  */
44  
45 #include <linux/config.h>
46 #include <linux/types.h>
47 #include <linux/sched.h>
48 #include <linux/mm.h>
49 #include <linux/fcntl.h>
50 #include <linux/socket.h>
51 #include <linux/in.h>
52 #include <linux/inet.h>
53 #include <linux/netdevice.h>
54 #include <linux/if_packet.h>
55 #include <linux/wireless.h>
56 #include <linux/kmod.h>
57 #include <net/ip.h>
58 #include <net/protocol.h>
59 #include <linux/skbuff.h>
60 #include <net/sock.h>
61 #include <linux/errno.h>
62 #include <linux/timer.h>
63 #include <asm/system.h>
64 #include <asm/uaccess.h>
65 #include <asm/ioctls.h>
66 #include <linux/proc_fs.h>
67 #include <linux/poll.h>
68 #include <linux/module.h>
69 #include <linux/init.h>
70 #include <linux/if_bridge.h>
71
72 #ifdef CONFIG_NET_DIVERT
73 #include <linux/divert.h>
74 #endif /* CONFIG_NET_DIVERT */
75
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79
80 #ifdef CONFIG_DLCI
81 extern int dlci_ioctl(unsigned int, void*);
82 #endif
83
84 #define CONFIG_SOCK_PACKET      1
85
86 /*
87    Proposed replacement for SIOC{ADD,DEL}MULTI and
88    IFF_PROMISC, IFF_ALLMULTI flags.
89
90    It is more expensive, but I believe,
91    it is really correct solution: reentereble, safe and fault tolerant.
92
93    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
94    reference count and global flag, so that real status is
95    (gflag|(count != 0)), so that we can use obsolete faulty interface
96    not harming clever users.
97  */
98 #define CONFIG_PACKET_MULTICAST 1
99
100 /*
101    Assumptions:
102    - if device has no dev->hard_header routine, it adds and removes ll header
103      inside itself. In this case ll header is invisible outside of device,
104      but higher levels still should reserve dev->hard_header_len.
105      Some devices are enough clever to reallocate skb, when header
106      will not fit to reserved space (tunnel), another ones are silly
107      (PPP).
108    - packet socket receives packets with pulled ll header,
109      so that SOCK_RAW should push it back.
110
111 On receive:
112 -----------
113
114 Incoming, dev->hard_header!=NULL
115    mac.raw -> ll header
116    data    -> data
117
118 Outgoing, dev->hard_header!=NULL
119    mac.raw -> ll header
120    data    -> ll header
121
122 Incoming, dev->hard_header==NULL
123    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
124               PPP makes it, that is wrong, because introduce assymetry
125               between rx and tx paths.
126    data    -> data
127
128 Outgoing, dev->hard_header==NULL
129    mac.raw -> data. ll header is still not built!
130    data    -> data
131
132 Resume
133   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
134
135
136 On transmit:
137 ------------
138
139 dev->hard_header != NULL
140    mac.raw -> ll header
141    data    -> ll header
142
143 dev->hard_header == NULL (ll header is added by device, we cannot control it)
144    mac.raw -> data
145    data -> data
146
147    We should set nh.raw on output to correct posistion,
148    packet classifier depends on it.
149  */
150
151 /* List of all packet sockets. */
152 static struct sock * packet_sklist;
153 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
154
155 atomic_t packet_socks_nr;
156
157
158 /* Private packet socket structures. */
159
160 #ifdef CONFIG_PACKET_MULTICAST
161 struct packet_mclist
162 {
163         struct packet_mclist    *next;
164         int                     ifindex;
165         int                     count;
166         unsigned short          type;
167         unsigned short          alen;
168         unsigned char           addr[8];
169 };
170 #endif
171 #ifdef CONFIG_PACKET_MMAP
172 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
173 #endif
174
175 static void packet_flush_mclist(struct sock *sk);
176
177 struct packet_opt
178 {
179         struct packet_type      prot_hook;
180         spinlock_t              bind_lock;
181         char                    running;        /* prot_hook is attached*/
182         int                     ifindex;        /* bound device         */
183         struct tpacket_stats    stats;
184 #ifdef CONFIG_PACKET_MULTICAST
185         struct packet_mclist    *mclist;
186 #endif
187 #ifdef CONFIG_PACKET_MMAP
188         atomic_t                mapped;
189         unsigned long           *pg_vec;
190         unsigned int            pg_vec_order;
191         unsigned int            pg_vec_pages;
192         unsigned int            pg_vec_len;
193
194         struct tpacket_hdr      **iovec;
195         unsigned int            frame_size;
196         unsigned int            iovmax;
197         unsigned int            head;
198         int                     copy_thresh;
199 #endif
200 };
201
202 void packet_sock_destruct(struct sock *sk)
203 {
204         BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
205         BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
206
207         if (!sk->dead) {
208                 printk("Attempt to release alive packet socket: %p\n", sk);
209                 return;
210         }
211
212         if (sk->protinfo.destruct_hook)
213                 kfree(sk->protinfo.destruct_hook);
214         atomic_dec(&packet_socks_nr);
215 #ifdef PACKET_REFCNT_DEBUG
216         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
217 #endif
218         MOD_DEC_USE_COUNT;
219 }
220
221
222 extern struct proto_ops packet_ops;
223
224 #ifdef CONFIG_SOCK_PACKET
225 extern struct proto_ops packet_ops_spkt;
226
227 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
228 {
229         struct sock *sk;
230         struct sockaddr_pkt *spkt;
231
232         /*
233          *      When we registered the protocol we saved the socket in the data
234          *      field for just this event.
235          */
236
237         sk = (struct sock *) pt->data;
238         
239         /*
240          *      Yank back the headers [hope the device set this
241          *      right or kerboom...]
242          *
243          *      Incoming packets have ll header pulled,
244          *      push it back.
245          *
246          *      For outgoing ones skb->data == skb->mac.raw
247          *      so that this procedure is noop.
248          */
249
250         if (skb->pkt_type == PACKET_LOOPBACK)
251                 goto out;
252
253         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
254                 goto oom;
255
256         spkt = (struct sockaddr_pkt*)skb->cb;
257
258         skb_push(skb, skb->data-skb->mac.raw);
259
260         /*
261          *      The SOCK_PACKET socket receives _all_ frames.
262          */
263
264         spkt->spkt_family = dev->type;
265         strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
266         spkt->spkt_protocol = skb->protocol;
267
268         /*
269          *      Charge the memory to the socket. This is done specifically
270          *      to prevent sockets using all the memory up.
271          */
272
273         if (sock_queue_rcv_skb(sk,skb) == 0)
274                 return 0;
275
276 out:
277         kfree_skb(skb);
278 oom:
279         return 0;
280 }
281
282
283 /*
284  *      Output a raw packet to a device layer. This bypasses all the other
285  *      protocol layers and you must therefore supply it with a complete frame
286  */
287  
288 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
289                                struct scm_cookie *scm)
290 {
291         struct sock *sk = sock->sk;
292         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
293         struct sk_buff *skb;
294         struct net_device *dev;
295         unsigned short proto=0;
296         int err;
297         
298         /*
299          *      Get and verify the address. 
300          */
301
302         if (saddr)
303         {
304                 if (msg->msg_namelen < sizeof(struct sockaddr))
305                         return(-EINVAL);
306                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
307                         proto=saddr->spkt_protocol;
308         }
309         else
310                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
311
312         /*
313          *      Find the device first to size check it 
314          */
315
316         saddr->spkt_device[13] = 0;
317         dev = dev_get_by_name(saddr->spkt_device);
318         err = -ENODEV;
319         if (dev == NULL)
320                 goto out_unlock;
321         
322         /*
323          *      You may not queue a frame bigger than the mtu. This is the lowest level
324          *      raw protocol and you must do your own fragmentation at this level.
325          */
326          
327         err = -EMSGSIZE;
328         if(len>dev->mtu+dev->hard_header_len)
329                 goto out_unlock;
330
331         err = -ENOBUFS;
332         skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
333
334         /*
335          *      If the write buffer is full, then tough. At this level the user gets to
336          *      deal with the problem - do your own algorithmic backoffs. That's far
337          *      more flexible.
338          */
339          
340         if (skb == NULL) 
341                 goto out_unlock;
342
343         /*
344          *      Fill it in 
345          */
346          
347         /* FIXME: Save some space for broken drivers that write a
348          * hard header at transmission time by themselves. PPP is the
349          * notable one here. This should really be fixed at the driver level.
350          */
351         skb_reserve(skb,(dev->hard_header_len+15)&~15);
352         skb->nh.raw = skb->data;
353
354         /* Try to align data part correctly */
355         if (dev->hard_header) {
356                 skb->data -= dev->hard_header_len;
357                 skb->tail -= dev->hard_header_len;
358                 if (len < dev->hard_header_len)
359                         skb->nh.raw = skb->data;
360         }
361
362         /* Returns -EFAULT on error */
363         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
364         skb->protocol = proto;
365         skb->dev = dev;
366         skb->priority = sk->priority;
367         if (err)
368                 goto out_free;
369
370         err = -ENETDOWN;
371         if (!(dev->flags & IFF_UP))
372                 goto out_free;
373
374         /*
375          *      Now send it
376          */
377
378         dev_queue_xmit(skb);
379         dev_put(dev);
380         return(len);
381
382 out_free:
383         kfree_skb(skb);
384 out_unlock:
385         if (dev)
386                 dev_put(dev);
387         return err;
388 }
389 #endif
390
391 /*
392    This function makes lazy skb cloning in hope that most of packets
393    are discarded by BPF.
394
395    Note tricky part: we DO mangle shared skb! skb->data, skb->len
396    and skb->cb are mangled. It works because (and until) packets
397    falling here are owned by current CPU. Output packets are cloned
398    by dev_queue_xmit_nit(), input packets are processed by net_bh
399    sequencially, so that if we return skb to original state on exit,
400    we will not harm anyone.
401  */
402
403 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
404 {
405         struct sock *sk;
406         struct sockaddr_ll *sll;
407         struct packet_opt *po;
408         u8 * skb_head = skb->data;
409         int skb_len = skb->len;
410 #ifdef CONFIG_FILTER
411         unsigned snaplen;
412 #endif
413
414         if (skb->pkt_type == PACKET_LOOPBACK)
415                 goto drop;
416
417         sk = (struct sock *) pt->data;
418         po = sk->protinfo.af_packet;
419
420         skb->dev = dev;
421
422         if (dev->hard_header) {
423                 /* The device has an explicit notion of ll header,
424                    exported to higher levels.
425
426                    Otherwise, the device hides datails of it frame
427                    structure, so that corresponding packet head
428                    never delivered to user.
429                  */
430                 if (sk->type != SOCK_DGRAM)
431                         skb_push(skb, skb->data - skb->mac.raw);
432                 else if (skb->pkt_type == PACKET_OUTGOING) {
433                         /* Special case: outgoing packets have ll header at head */
434                         skb_pull(skb, skb->nh.raw - skb->data);
435                 }
436         }
437
438 #ifdef CONFIG_FILTER
439         snaplen = skb->len;
440
441         if (sk->filter) {
442                 unsigned res = snaplen;
443                 struct sk_filter *filter;
444
445                 bh_lock_sock(sk);
446                 if ((filter = sk->filter) != NULL)
447                         res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
448                 bh_unlock_sock(sk);
449
450                 if (res == 0)
451                         goto drop_n_restore;
452                 if (snaplen > res)
453                         snaplen = res;
454         }
455 #endif /* CONFIG_FILTER */
456
457         if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
458                 goto drop_n_acct;
459
460         if (skb_shared(skb)) {
461                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
462                 if (nskb == NULL)
463                         goto drop_n_acct;
464
465                 if (skb_head != skb->data) {
466                         skb->data = skb_head;
467                         skb->len = skb_len;
468                 }
469                 kfree_skb(skb);
470                 skb = nskb;
471         }
472
473         sll = (struct sockaddr_ll*)skb->cb;
474         sll->sll_family = AF_PACKET;
475         sll->sll_hatype = dev->type;
476         sll->sll_protocol = skb->protocol;
477         sll->sll_pkttype = skb->pkt_type;
478         sll->sll_ifindex = dev->ifindex;
479         sll->sll_halen = 0;
480
481         if (dev->hard_header_parse)
482                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
483
484 #ifdef CONFIG_FILTER
485         if (pskb_trim(skb, snaplen))
486                 goto drop_n_acct;
487 #endif
488
489         skb_set_owner_r(skb, sk);
490         skb->dev = NULL;
491         spin_lock(&sk->receive_queue.lock);
492         po->stats.tp_packets++;
493         __skb_queue_tail(&sk->receive_queue, skb);
494         spin_unlock(&sk->receive_queue.lock);
495         sk->data_ready(sk,skb->len);
496         return 0;
497
498 drop_n_acct:
499         spin_lock(&sk->receive_queue.lock);
500         po->stats.tp_drops++;
501         spin_unlock(&sk->receive_queue.lock);
502
503 #ifdef CONFIG_FILTER
504 drop_n_restore:
505 #endif
506         if (skb_head != skb->data && skb_shared(skb)) {
507                 skb->data = skb_head;
508                 skb->len = skb_len;
509         }
510 drop:
511         kfree_skb(skb);
512         return 0;
513 }
514
515 #ifdef CONFIG_PACKET_MMAP
516 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
517 {
518         struct sock *sk;
519         struct packet_opt *po;
520         struct sockaddr_ll *sll;
521         struct tpacket_hdr *h;
522         u8 * skb_head = skb->data;
523         int skb_len = skb->len;
524         unsigned snaplen;
525         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
526         unsigned short macoff, netoff;
527         struct sk_buff *copy_skb = NULL;
528
529         if (skb->pkt_type == PACKET_LOOPBACK)
530                 goto drop;
531
532         sk = (struct sock *) pt->data;
533         po = sk->protinfo.af_packet;
534
535         if (dev->hard_header) {
536                 if (sk->type != SOCK_DGRAM)
537                         skb_push(skb, skb->data - skb->mac.raw);
538                 else if (skb->pkt_type == PACKET_OUTGOING) {
539                         /* Special case: outgoing packets have ll header at head */
540                         skb_pull(skb, skb->nh.raw - skb->data);
541                         if (skb->ip_summed == CHECKSUM_HW)
542                                 status |= TP_STATUS_CSUMNOTREADY;
543                 }
544         }
545
546         snaplen = skb->len;
547
548 #ifdef CONFIG_FILTER
549         if (sk->filter) {
550                 unsigned res = snaplen;
551                 struct sk_filter *filter;
552
553                 bh_lock_sock(sk);
554                 if ((filter = sk->filter) != NULL)
555                         res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
556                 bh_unlock_sock(sk);
557
558                 if (res == 0)
559                         goto drop_n_restore;
560                 if (snaplen > res)
561                         snaplen = res;
562         }
563 #endif
564
565         if (sk->type == SOCK_DGRAM) {
566                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
567         } else {
568                 unsigned maclen = skb->nh.raw - skb->data;
569                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
570                 macoff = netoff - maclen;
571         }
572
573         if (macoff + snaplen > po->frame_size) {
574                 if (po->copy_thresh &&
575                     atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
576                         if (skb_shared(skb)) {
577                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
578                         } else {
579                                 copy_skb = skb_get(skb);
580                                 skb_head = skb->data;
581                         }
582                         if (copy_skb)
583                                 skb_set_owner_r(copy_skb, sk);
584                 }
585                 snaplen = po->frame_size - macoff;
586                 if ((int)snaplen < 0)
587                         snaplen = 0;
588         }
589         if (snaplen > skb->len-skb->data_len)
590                 snaplen = skb->len-skb->data_len;
591
592         spin_lock(&sk->receive_queue.lock);
593         h = po->iovec[po->head];
594
595         if (h->tp_status)
596                 goto ring_is_full;
597         po->head = po->head != po->iovmax ? po->head+1 : 0;
598         po->stats.tp_packets++;
599         if (copy_skb) {
600                 status |= TP_STATUS_COPY;
601                 __skb_queue_tail(&sk->receive_queue, copy_skb);
602         }
603         if (!po->stats.tp_drops)
604                 status &= ~TP_STATUS_LOSING;
605         spin_unlock(&sk->receive_queue.lock);
606
607         memcpy((u8*)h + macoff, skb->data, snaplen);
608
609         h->tp_len = skb->len;
610         h->tp_snaplen = snaplen;
611         h->tp_mac = macoff;
612         h->tp_net = netoff;
613         h->tp_sec = skb->stamp.tv_sec;
614         h->tp_usec = skb->stamp.tv_usec;
615
616         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
617         sll->sll_halen = 0;
618         if (dev->hard_header_parse)
619                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
620         sll->sll_family = AF_PACKET;
621         sll->sll_hatype = dev->type;
622         sll->sll_protocol = skb->protocol;
623         sll->sll_pkttype = skb->pkt_type;
624         sll->sll_ifindex = dev->ifindex;
625
626         h->tp_status = status;
627         mb();
628
629         {
630                 struct page *p_start, *p_end;
631                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
632
633                 p_start = virt_to_page(h);
634                 p_end = virt_to_page(h_end);
635                 while (p_start <= p_end) {
636                         flush_dcache_page(p_start);
637                         p_start++;
638                 }
639         }
640
641         sk->data_ready(sk, 0);
642
643 drop_n_restore:
644         if (skb_head != skb->data && skb_shared(skb)) {
645                 skb->data = skb_head;
646                 skb->len = skb_len;
647         }
648 drop:
649         kfree_skb(skb);
650         return 0;
651
652 ring_is_full:
653         po->stats.tp_drops++;
654         spin_unlock(&sk->receive_queue.lock);
655
656         sk->data_ready(sk, 0);
657         if (copy_skb)
658                 kfree_skb(copy_skb);
659         goto drop_n_restore;
660 }
661
662 #endif
663
664
665 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
666                           struct scm_cookie *scm)
667 {
668         struct sock *sk = sock->sk;
669         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
670         struct sk_buff *skb;
671         struct net_device *dev;
672         unsigned short proto;
673         unsigned char *addr;
674         int ifindex, err, reserve = 0;
675
676         /*
677          *      Get and verify the address. 
678          */
679          
680         if (saddr == NULL) {
681                 ifindex = sk->protinfo.af_packet->ifindex;
682                 proto   = sk->num;
683                 addr    = NULL;
684         } else {
685                 err = -EINVAL;
686                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
687                         goto out;
688                 ifindex = saddr->sll_ifindex;
689                 proto   = saddr->sll_protocol;
690                 addr    = saddr->sll_addr;
691         }
692
693
694         dev = dev_get_by_index(ifindex);
695         err = -ENXIO;
696         if (dev == NULL)
697                 goto out_unlock;
698         if (sock->type == SOCK_RAW)
699                 reserve = dev->hard_header_len;
700
701         err = -EMSGSIZE;
702         if (len > dev->mtu+reserve)
703                 goto out_unlock;
704
705         skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 
706                                 msg->msg_flags & MSG_DONTWAIT, &err);
707         if (skb==NULL)
708                 goto out_unlock;
709
710         skb_reserve(skb, (dev->hard_header_len+15)&~15);
711         skb->nh.raw = skb->data;
712
713         if (dev->hard_header) {
714                 int res;
715                 err = -EINVAL;
716                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
717                 if (sock->type != SOCK_DGRAM) {
718                         skb->tail = skb->data;
719                         skb->len = 0;
720                 } else if (res < 0)
721                         goto out_free;
722         }
723
724         /* Returns -EFAULT on error */
725         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
726         if (err)
727                 goto out_free;
728
729         skb->protocol = proto;
730         skb->dev = dev;
731         skb->priority = sk->priority;
732
733         err = -ENETDOWN;
734         if (!(dev->flags & IFF_UP))
735                 goto out_free;
736
737         /*
738          *      Now send it
739          */
740
741         err = dev_queue_xmit(skb);
742         if (err > 0 && (err = net_xmit_errno(err)) != 0)
743                 goto out_unlock;
744
745         dev_put(dev);
746
747         return(len);
748
749 out_free:
750         kfree_skb(skb);
751 out_unlock:
752         if (dev)
753                 dev_put(dev);
754 out:
755         return err;
756 }
757
758 /*
759  *      Close a PACKET socket. This is fairly simple. We immediately go
760  *      to 'closed' state and remove our protocol entry in the device list.
761  */
762
763 static int packet_release(struct socket *sock)
764 {
765         struct sock *sk = sock->sk;
766         struct sock **skp;
767
768         if (!sk)
769                 return 0;
770
771         write_lock_bh(&packet_sklist_lock);
772         for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
773                 if (*skp == sk) {
774                         *skp = sk->next;
775                         __sock_put(sk);
776                         break;
777                 }
778         }
779         write_unlock_bh(&packet_sklist_lock);
780
781         /*
782          *      Unhook packet receive handler.
783          */
784
785         if (sk->protinfo.af_packet->running) {
786                 /*
787                  *      Remove the protocol hook
788                  */
789                 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
790                 sk->protinfo.af_packet->running = 0;
791                 __sock_put(sk);
792         }
793
794 #ifdef CONFIG_PACKET_MULTICAST
795         packet_flush_mclist(sk);
796 #endif
797
798 #ifdef CONFIG_PACKET_MMAP
799         if (sk->protinfo.af_packet->pg_vec) {
800                 struct tpacket_req req;
801                 memset(&req, 0, sizeof(req));
802                 packet_set_ring(sk, &req, 1);
803         }
804 #endif
805
806         /*
807          *      Now the socket is dead. No more input will appear.
808          */
809
810         sock_orphan(sk);
811         sock->sk = NULL;
812
813         /* Purge queues */
814
815         skb_queue_purge(&sk->receive_queue);
816
817         sock_put(sk);
818         return 0;
819 }
820
821 /*
822  *      Attach a packet hook.
823  */
824
825 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
826 {
827         /*
828          *      Detach an existing hook if present.
829          */
830
831         lock_sock(sk);
832
833         spin_lock(&sk->protinfo.af_packet->bind_lock);
834         if (sk->protinfo.af_packet->running) {
835                 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
836                 __sock_put(sk);
837                 sk->protinfo.af_packet->running = 0;
838         }
839
840         sk->num = protocol;
841         sk->protinfo.af_packet->prot_hook.type = protocol;
842         sk->protinfo.af_packet->prot_hook.dev = dev;
843
844         sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
845
846         if (protocol == 0)
847                 goto out_unlock;
848
849         if (dev) {
850                 if (dev->flags&IFF_UP) {
851                         dev_add_pack(&sk->protinfo.af_packet->prot_hook);
852                         sock_hold(sk);
853                         sk->protinfo.af_packet->running = 1;
854                 } else {
855                         sk->err = ENETDOWN;
856                         if (!sk->dead)
857                                 sk->error_report(sk);
858                 }
859         } else {
860                 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
861                 sock_hold(sk);
862                 sk->protinfo.af_packet->running = 1;
863         }
864
865 out_unlock:
866         spin_unlock(&sk->protinfo.af_packet->bind_lock);
867         release_sock(sk);
868         return 0;
869 }
870
871 /*
872  *      Bind a packet socket to a device
873  */
874
875 #ifdef CONFIG_SOCK_PACKET
876
877 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
878 {
879         struct sock *sk=sock->sk;
880         char name[15];
881         struct net_device *dev;
882         int err = -ENODEV;
883         
884         /*
885          *      Check legality
886          */
887          
888         if(addr_len!=sizeof(struct sockaddr))
889                 return -EINVAL;
890         strncpy(name,uaddr->sa_data,14);
891         name[14]=0;
892
893         dev = dev_get_by_name(name);
894         if (dev) {
895                 err = packet_do_bind(sk, dev, sk->num);
896                 dev_put(dev);
897         }
898         return err;
899 }
900 #endif
901
902 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
903 {
904         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
905         struct sock *sk=sock->sk;
906         struct net_device *dev = NULL;
907         int err;
908
909
910         /*
911          *      Check legality
912          */
913          
914         if (addr_len < sizeof(struct sockaddr_ll))
915                 return -EINVAL;
916         if (sll->sll_family != AF_PACKET)
917                 return -EINVAL;
918
919         if (sll->sll_ifindex) {
920                 err = -ENODEV;
921                 dev = dev_get_by_index(sll->sll_ifindex);
922                 if (dev == NULL)
923                         goto out;
924         }
925         err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
926         if (dev)
927                 dev_put(dev);
928
929 out:
930         return err;
931 }
932
933
934 /*
935  *      Create a packet of type SOCK_PACKET. 
936  */
937
938 static int packet_create(struct socket *sock, int protocol)
939 {
940         struct sock *sk;
941         int err;
942
943         if (!capable(CAP_NET_RAW))
944                 return -EPERM;
945         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
946 #ifdef CONFIG_SOCK_PACKET
947             && sock->type != SOCK_PACKET
948 #endif
949             )
950                 return -ESOCKTNOSUPPORT;
951
952         sock->state = SS_UNCONNECTED;
953         MOD_INC_USE_COUNT;
954
955         err = -ENOBUFS;
956         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
957         if (sk == NULL)
958                 goto out;
959
960         sock->ops = &packet_ops;
961 #ifdef CONFIG_SOCK_PACKET
962         if (sock->type == SOCK_PACKET)
963                 sock->ops = &packet_ops_spkt;
964 #endif
965         sock_init_data(sock,sk);
966
967         sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
968         if (sk->protinfo.af_packet == NULL)
969                 goto out_free;
970         memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
971         sk->family = PF_PACKET;
972         sk->num = protocol;
973
974         sk->destruct = packet_sock_destruct;
975         atomic_inc(&packet_socks_nr);
976
977         /*
978          *      Attach a protocol block
979          */
980
981         spin_lock_init(&sk->protinfo.af_packet->bind_lock);
982         sk->protinfo.af_packet->prot_hook.func = packet_rcv;
983 #ifdef CONFIG_SOCK_PACKET
984         if (sock->type == SOCK_PACKET)
985                 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
986 #endif
987         sk->protinfo.af_packet->prot_hook.data = (void *)sk;
988
989         if (protocol) {
990                 sk->protinfo.af_packet->prot_hook.type = protocol;
991                 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
992                 sock_hold(sk);
993                 sk->protinfo.af_packet->running = 1;
994         }
995
996         write_lock_bh(&packet_sklist_lock);
997         sk->next = packet_sklist;
998         packet_sklist = sk;
999         sock_hold(sk);
1000         write_unlock_bh(&packet_sklist_lock);
1001         return(0);
1002
1003 out_free:
1004         sk_free(sk);
1005 out:
1006         MOD_DEC_USE_COUNT;
1007         return err;
1008 }
1009
1010 /*
1011  *      Pull a packet from our receive queue and hand it to the user.
1012  *      If necessary we block.
1013  */
1014
1015 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
1016                           int flags, struct scm_cookie *scm)
1017 {
1018         struct sock *sk = sock->sk;
1019         struct sk_buff *skb;
1020         int copied, err;
1021
1022         err = -EINVAL;
1023         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1024                 goto out;
1025
1026 #if 0
1027         /* What error should we return now? EUNATTACH? */
1028         if (sk->protinfo.af_packet->ifindex < 0)
1029                 return -ENODEV;
1030 #endif
1031
1032         /*
1033          *      If the address length field is there to be filled in, we fill
1034          *      it in now.
1035          */
1036
1037         if (sock->type == SOCK_PACKET)
1038                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1039         else
1040                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1041
1042         /*
1043          *      Call the generic datagram receiver. This handles all sorts
1044          *      of horrible races and re-entrancy so we can forget about it
1045          *      in the protocol layers.
1046          *
1047          *      Now it will return ENETDOWN, if device have just gone down,
1048          *      but then it will block.
1049          */
1050
1051         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1052
1053         /*
1054          *      An error occurred so return it. Because skb_recv_datagram() 
1055          *      handles the blocking we don't see and worry about blocking
1056          *      retries.
1057          */
1058
1059         if(skb==NULL)
1060                 goto out;
1061
1062         /*
1063          *      You lose any data beyond the buffer you gave. If it worries a
1064          *      user program they can ask the device for its MTU anyway.
1065          */
1066
1067         copied = skb->len;
1068         if (copied > len)
1069         {
1070                 copied=len;
1071                 msg->msg_flags|=MSG_TRUNC;
1072         }
1073
1074         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1075         if (err)
1076                 goto out_free;
1077
1078         sock_recv_timestamp(msg, sk, skb);
1079
1080         if (msg->msg_name)
1081                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1082
1083         /*
1084          *      Free or return the buffer as appropriate. Again this
1085          *      hides all the races and re-entrancy issues from us.
1086          */
1087         err = (flags&MSG_TRUNC) ? skb->len : copied;
1088
1089 out_free:
1090         skb_free_datagram(sk, skb);
1091 out:
1092         return err;
1093 }
1094
1095 #ifdef CONFIG_SOCK_PACKET
1096 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1097                                int *uaddr_len, int peer)
1098 {
1099         struct net_device *dev;
1100         struct sock *sk = sock->sk;
1101
1102         if (peer)
1103                 return -EOPNOTSUPP;
1104
1105         uaddr->sa_family = AF_PACKET;
1106         dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1107         if (dev) {
1108                 strncpy(uaddr->sa_data, dev->name, 15);
1109                 dev_put(dev);
1110         } else
1111                 memset(uaddr->sa_data, 0, 14);
1112         *uaddr_len = sizeof(*uaddr);
1113
1114         return 0;
1115 }
1116 #endif
1117
1118 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1119                           int *uaddr_len, int peer)
1120 {
1121         struct net_device *dev;
1122         struct sock *sk = sock->sk;
1123         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1124
1125         if (peer)
1126                 return -EOPNOTSUPP;
1127
1128         sll->sll_family = AF_PACKET;
1129         sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1130         sll->sll_protocol = sk->num;
1131         dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1132         if (dev) {
1133                 sll->sll_hatype = dev->type;
1134                 sll->sll_halen = dev->addr_len;
1135                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1136                 dev_put(dev);
1137         } else {
1138                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1139                 sll->sll_halen = 0;
1140         }
1141         *uaddr_len = sizeof(*sll);
1142
1143         return 0;
1144 }
1145
1146 #ifdef CONFIG_PACKET_MULTICAST
1147 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1148 {
1149         switch (i->type) {
1150         case PACKET_MR_MULTICAST:
1151                 if (what > 0)
1152                         dev_mc_add(dev, i->addr, i->alen, 0);
1153                 else
1154                         dev_mc_delete(dev, i->addr, i->alen, 0);
1155                 break;
1156         case PACKET_MR_PROMISC:
1157                 dev_set_promiscuity(dev, what);
1158                 break;
1159         case PACKET_MR_ALLMULTI:
1160                 dev_set_allmulti(dev, what);
1161                 break;
1162         default:;
1163         }
1164 }
1165
1166 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1167 {
1168         for ( ; i; i=i->next) {
1169                 if (i->ifindex == dev->ifindex)
1170                         packet_dev_mc(dev, i, what);
1171         }
1172 }
1173
1174 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1175 {
1176         struct packet_mclist *ml, *i;
1177         struct net_device *dev;
1178         int err;
1179
1180         rtnl_lock();
1181
1182         err = -ENODEV;
1183         dev = __dev_get_by_index(mreq->mr_ifindex);
1184         if (!dev)
1185                 goto done;
1186
1187         err = -EINVAL;
1188         if (mreq->mr_alen > dev->addr_len)
1189                 goto done;
1190
1191         err = -ENOBUFS;
1192         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1193         if (i == NULL)
1194                 goto done;
1195
1196         err = 0;
1197         for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1198                 if (ml->ifindex == mreq->mr_ifindex &&
1199                     ml->type == mreq->mr_type &&
1200                     ml->alen == mreq->mr_alen &&
1201                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1202                         ml->count++;
1203                         /* Free the new element ... */
1204                         kfree(i);
1205                         goto done;
1206                 }
1207         }
1208
1209         i->type = mreq->mr_type;
1210         i->ifindex = mreq->mr_ifindex;
1211         i->alen = mreq->mr_alen;
1212         memcpy(i->addr, mreq->mr_address, i->alen);
1213         i->count = 1;
1214         i->next = sk->protinfo.af_packet->mclist;
1215         sk->protinfo.af_packet->mclist = i;
1216         packet_dev_mc(dev, i, +1);
1217
1218 done:
1219         rtnl_unlock();
1220         return err;
1221 }
1222
1223 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1224 {
1225         struct packet_mclist *ml, **mlp;
1226
1227         rtnl_lock();
1228
1229         for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1230                 if (ml->ifindex == mreq->mr_ifindex &&
1231                     ml->type == mreq->mr_type &&
1232                     ml->alen == mreq->mr_alen &&
1233                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1234                         if (--ml->count == 0) {
1235                                 struct net_device *dev;
1236                                 *mlp = ml->next;
1237                                 dev = dev_get_by_index(ml->ifindex);
1238                                 if (dev) {
1239                                         packet_dev_mc(dev, ml, -1);
1240                                         dev_put(dev);
1241                                 }
1242                                 kfree(ml);
1243                         }
1244                         rtnl_unlock();
1245                         return 0;
1246                 }
1247         }
1248         rtnl_unlock();
1249         return -EADDRNOTAVAIL;
1250 }
1251
1252 static void packet_flush_mclist(struct sock *sk)
1253 {
1254         struct packet_mclist *ml;
1255
1256         if (sk->protinfo.af_packet->mclist == NULL)
1257                 return;
1258
1259         rtnl_lock();
1260         while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1261                 struct net_device *dev;
1262                 sk->protinfo.af_packet->mclist = ml->next;
1263                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1264                         packet_dev_mc(dev, ml, -1);
1265                         dev_put(dev);
1266                 }
1267                 kfree(ml);
1268         }
1269         rtnl_unlock();
1270 }
1271 #endif
1272
1273 static int
1274 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1275 {
1276         struct sock *sk = sock->sk;
1277         int ret;
1278
1279         if (level != SOL_PACKET)
1280                 return -ENOPROTOOPT;
1281
1282         switch(optname) {
1283 #ifdef CONFIG_PACKET_MULTICAST
1284         case PACKET_ADD_MEMBERSHIP:     
1285         case PACKET_DROP_MEMBERSHIP:
1286         {
1287                 struct packet_mreq mreq;
1288                 if (optlen<sizeof(mreq))
1289                         return -EINVAL;
1290                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1291                         return -EFAULT;
1292                 if (optname == PACKET_ADD_MEMBERSHIP)
1293                         ret = packet_mc_add(sk, &mreq);
1294                 else
1295                         ret = packet_mc_drop(sk, &mreq);
1296                 return ret;
1297         }
1298 #endif
1299 #ifdef CONFIG_PACKET_MMAP
1300         case PACKET_RX_RING:
1301         {
1302                 struct tpacket_req req;
1303
1304                 if (optlen<sizeof(req))
1305                         return -EINVAL;
1306                 if (copy_from_user(&req,optval,sizeof(req)))
1307                         return -EFAULT;
1308                 return packet_set_ring(sk, &req, 0);
1309         }
1310         case PACKET_COPY_THRESH:
1311         {
1312                 int val;
1313
1314                 if (optlen!=sizeof(val))
1315                         return -EINVAL;
1316                 if (copy_from_user(&val,optval,sizeof(val)))
1317                         return -EFAULT;
1318
1319                 sk->protinfo.af_packet->copy_thresh = val;
1320                 return 0;
1321         }
1322 #endif
1323         default:
1324                 return -ENOPROTOOPT;
1325         }
1326 }
1327
1328 int packet_getsockopt(struct socket *sock, int level, int optname,
1329                       char *optval, int *optlen)
1330 {
1331         int len;
1332         struct sock *sk = sock->sk;
1333
1334         if (level != SOL_PACKET)
1335                 return -ENOPROTOOPT;
1336
1337         if (get_user(len,optlen))
1338                 return -EFAULT;
1339
1340         if (len < 0)
1341                 return -EINVAL;
1342                 
1343         switch(optname) {
1344         case PACKET_STATISTICS:
1345         {
1346                 struct tpacket_stats st;
1347
1348                 if (len > sizeof(struct tpacket_stats))
1349                         len = sizeof(struct tpacket_stats);
1350                 spin_lock_bh(&sk->receive_queue.lock);
1351                 st = sk->protinfo.af_packet->stats;
1352                 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1353                 spin_unlock_bh(&sk->receive_queue.lock);
1354                 st.tp_packets += st.tp_drops;
1355
1356                 if (copy_to_user(optval, &st, len))
1357                         return -EFAULT;
1358                 break;
1359         }
1360         default:
1361                 return -ENOPROTOOPT;
1362         }
1363
1364         if (put_user(len, optlen))
1365                 return -EFAULT;
1366         return 0;
1367 }
1368
1369
1370 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1371 {
1372         struct sock *sk;
1373         struct packet_opt *po;
1374         struct net_device *dev = (struct net_device*)data;
1375
1376         read_lock(&packet_sklist_lock);
1377         for (sk = packet_sklist; sk; sk = sk->next) {
1378                 po = sk->protinfo.af_packet;
1379
1380                 switch (msg) {
1381                 case NETDEV_DOWN:
1382                 case NETDEV_UNREGISTER:
1383                         if (dev->ifindex == po->ifindex) {
1384                                 spin_lock(&po->bind_lock);
1385                                 if (po->running) {
1386                                         dev_remove_pack(&po->prot_hook);
1387                                         __sock_put(sk);
1388                                         po->running = 0;
1389                                         sk->err = ENETDOWN;
1390                                         if (!sk->dead)
1391                                                 sk->error_report(sk);
1392                                 }
1393                                 if (msg == NETDEV_UNREGISTER) {
1394                                         po->ifindex = -1;
1395                                         po->prot_hook.dev = NULL;
1396                                 }
1397                                 spin_unlock(&po->bind_lock);
1398                         }
1399 #ifdef CONFIG_PACKET_MULTICAST
1400                         if (po->mclist)
1401                                 packet_dev_mclist(dev, po->mclist, -1);
1402 #endif
1403                         break;
1404                 case NETDEV_UP:
1405                         spin_lock(&po->bind_lock);
1406                         if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1407                                 dev_add_pack(&po->prot_hook);
1408                                 sock_hold(sk);
1409                                 po->running = 1;
1410                         }
1411                         spin_unlock(&po->bind_lock);
1412 #ifdef CONFIG_PACKET_MULTICAST
1413                         if (po->mclist)
1414                                 packet_dev_mclist(dev, po->mclist, +1);
1415 #endif
1416                         break;
1417                 }
1418         }
1419         read_unlock(&packet_sklist_lock);
1420         return NOTIFY_DONE;
1421 }
1422
1423
1424 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1425                         unsigned long arg)
1426 {
1427         struct sock *sk = sock->sk;
1428
1429         switch(cmd) 
1430         {
1431                 case SIOCOUTQ:
1432                 {
1433                         int amount = atomic_read(&sk->wmem_alloc);
1434                         return put_user(amount, (int *)arg);
1435                 }
1436                 case SIOCINQ:
1437                 {
1438                         struct sk_buff *skb;
1439                         int amount = 0;
1440
1441                         spin_lock_bh(&sk->receive_queue.lock);
1442                         skb = skb_peek(&sk->receive_queue);
1443                         if (skb)
1444                                 amount = skb->len;
1445                         spin_unlock_bh(&sk->receive_queue.lock);
1446                         return put_user(amount, (int *)arg);
1447                 }
1448                 case FIOSETOWN:
1449                 case SIOCSPGRP: {
1450                         int pid;
1451                         if (get_user(pid, (int *) arg))
1452                                 return -EFAULT; 
1453                         if (current->pid != pid && current->pgrp != -pid && 
1454                             !capable(CAP_NET_ADMIN))
1455                                 return -EPERM;
1456                         sk->proc = pid;
1457                         break;
1458                 }
1459                 case FIOGETOWN:
1460                 case SIOCGPGRP:
1461                         return put_user(sk->proc, (int *)arg);
1462                 case SIOCGSTAMP:
1463                         if(sk->stamp.tv_sec==0)
1464                                 return -ENOENT;
1465                         if (copy_to_user((void *)arg, &sk->stamp,
1466                                          sizeof(struct timeval)))
1467                                 return -EFAULT;
1468                         break;
1469                 case SIOCGIFFLAGS:
1470 #ifndef CONFIG_INET
1471                 case SIOCSIFFLAGS:
1472 #endif
1473                 case SIOCGIFCONF:
1474                 case SIOCGIFMETRIC:
1475                 case SIOCSIFMETRIC:
1476                 case SIOCGIFMEM:
1477                 case SIOCSIFMEM:
1478                 case SIOCGIFMTU:
1479                 case SIOCSIFMTU:
1480                 case SIOCSIFLINK:
1481                 case SIOCGIFHWADDR:
1482                 case SIOCSIFHWADDR:
1483                 case SIOCSIFMAP:
1484                 case SIOCGIFMAP:
1485                 case SIOCSIFSLAVE:
1486                 case SIOCGIFSLAVE:
1487                 case SIOCGIFINDEX:
1488                 case SIOCGIFNAME:
1489                 case SIOCGIFCOUNT:
1490                 case SIOCSIFHWBROADCAST:
1491                         return(dev_ioctl(cmd,(void *) arg));
1492
1493                 case SIOCGIFBR:
1494                 case SIOCSIFBR:
1495 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1496 #ifdef CONFIG_INET
1497 #ifdef CONFIG_KMOD
1498                         if (br_ioctl_hook == NULL)
1499                                 request_module("bridge");
1500 #endif
1501                         if (br_ioctl_hook != NULL)
1502                                 return br_ioctl_hook(arg);
1503 #endif
1504 #endif                          
1505                         return -ENOPKG;
1506
1507                 case SIOCGIFDIVERT:
1508                 case SIOCSIFDIVERT:
1509 #ifdef CONFIG_NET_DIVERT
1510                         return divert_ioctl(cmd, (struct divert_cf *) arg);
1511 #else
1512                         return -ENOPKG;
1513 #endif /* CONFIG_NET_DIVERT */
1514                         
1515 #ifdef CONFIG_INET
1516                 case SIOCADDRT:
1517                 case SIOCDELRT:
1518                 case SIOCDARP:
1519                 case SIOCGARP:
1520                 case SIOCSARP:
1521                 case SIOCGIFADDR:
1522                 case SIOCSIFADDR:
1523                 case SIOCGIFBRDADDR:
1524                 case SIOCSIFBRDADDR:
1525                 case SIOCGIFNETMASK:
1526                 case SIOCSIFNETMASK:
1527                 case SIOCGIFDSTADDR:
1528                 case SIOCSIFDSTADDR:
1529                 case SIOCSIFFLAGS:
1530                 case SIOCADDDLCI:
1531                 case SIOCDELDLCI:
1532                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1533 #endif
1534
1535                 default:
1536                         if ((cmd >= SIOCDEVPRIVATE) &&
1537                             (cmd <= (SIOCDEVPRIVATE + 15)))
1538                                 return(dev_ioctl(cmd,(void *) arg));
1539
1540 #ifdef CONFIG_NET_RADIO
1541                         if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1542                                 return(dev_ioctl(cmd,(void *) arg));
1543 #endif
1544                         return -EOPNOTSUPP;
1545         }
1546         return 0;
1547 }
1548
1549 #ifndef CONFIG_PACKET_MMAP
1550 #define packet_mmap sock_no_mmap
1551 #define packet_poll datagram_poll
1552 #else
1553
1554 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1555 {
1556         struct sock *sk = sock->sk;
1557         struct packet_opt *po = sk->protinfo.af_packet;
1558         unsigned int mask = datagram_poll(file, sock, wait);
1559
1560         spin_lock_bh(&sk->receive_queue.lock);
1561         if (po->iovec) {
1562                 unsigned last = po->head ? po->head-1 : po->iovmax;
1563
1564                 if (po->iovec[last]->tp_status)
1565                         mask |= POLLIN | POLLRDNORM;
1566         }
1567         spin_unlock_bh(&sk->receive_queue.lock);
1568         return mask;
1569 }
1570
1571
1572 /* Dirty? Well, I still did not learn better way to account
1573  * for user mmaps.
1574  */
1575
1576 static void packet_mm_open(struct vm_area_struct *vma)
1577 {
1578         struct file *file = vma->vm_file;
1579         struct inode *inode = file->f_dentry->d_inode;
1580         struct socket * sock = &inode->u.socket_i;
1581         struct sock *sk = sock->sk;
1582         
1583         if (sk)
1584                 atomic_inc(&sk->protinfo.af_packet->mapped);
1585 }
1586
1587 static void packet_mm_close(struct vm_area_struct *vma)
1588 {
1589         struct file *file = vma->vm_file;
1590         struct inode *inode = file->f_dentry->d_inode;
1591         struct socket * sock = &inode->u.socket_i;
1592         struct sock *sk = sock->sk;
1593         
1594         if (sk)
1595                 atomic_dec(&sk->protinfo.af_packet->mapped);
1596 }
1597
1598 static struct vm_operations_struct packet_mmap_ops = {
1599         open:   packet_mm_open,
1600         close:  packet_mm_close,
1601 };
1602
1603 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1604 {
1605         int i;
1606
1607         for (i=0; i<len; i++) {
1608                 if (pg_vec[i]) {
1609                         struct page *page, *pend;
1610
1611                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1612                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1613                                 ClearPageReserved(page);
1614                         free_pages(pg_vec[i], order);
1615                 }
1616         }
1617         kfree(pg_vec);
1618 }
1619
1620
1621 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1622 {
1623         unsigned long *pg_vec = NULL;
1624         struct tpacket_hdr **io_vec = NULL;
1625         struct packet_opt *po = sk->protinfo.af_packet;
1626         int order = 0;
1627         int err = 0;
1628
1629         if (req->tp_block_nr) {
1630                 int i, l;
1631                 int frames_per_block;
1632
1633                 /* Sanity tests and some calculations */
1634                 if ((int)req->tp_block_size <= 0)
1635                         return -EINVAL;
1636                 if (req->tp_block_size&(PAGE_SIZE-1))
1637                         return -EINVAL;
1638                 if (req->tp_frame_size < TPACKET_HDRLEN)
1639                         return -EINVAL;
1640                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1641                         return -EINVAL;
1642                 frames_per_block = req->tp_block_size/req->tp_frame_size;
1643                 if (frames_per_block <= 0)
1644                         return -EINVAL;
1645                 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1646                         return -EINVAL;
1647                 /* OK! */
1648
1649                 /* Allocate page vector */
1650                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1651                         order++;
1652
1653                 err = -ENOMEM;
1654
1655                 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1656                 if (pg_vec == NULL)
1657                         goto out;
1658                 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1659
1660                 for (i=0; i<req->tp_block_nr; i++) {
1661                         struct page *page, *pend;
1662                         pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1663                         if (!pg_vec[i])
1664                                 goto out_free_pgvec;
1665                         memset((void *)(pg_vec[i]), 0, PAGE_SIZE << order);
1666                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1667                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1668                                 SetPageReserved(page);
1669                 }
1670                 /* Page vector is allocated */
1671
1672                 /* Draw frames */
1673                 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1674                 if (io_vec == NULL)
1675                         goto out_free_pgvec;
1676                 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1677
1678                 l = 0;
1679                 for (i=0; i<req->tp_block_nr; i++) {
1680                         unsigned long ptr = pg_vec[i];
1681                         int k;
1682
1683                         for (k=0; k<frames_per_block; k++, l++) {
1684                                 io_vec[l] = (struct tpacket_hdr*)ptr;
1685                                 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1686                                 ptr += req->tp_frame_size;
1687                         }
1688                 }
1689                 /* Done */
1690         } else {
1691                 if (req->tp_frame_nr)
1692                         return -EINVAL;
1693         }
1694
1695         lock_sock(sk);
1696
1697         /* Detach socket from network */
1698         spin_lock(&po->bind_lock);
1699         if (po->running)
1700                 dev_remove_pack(&po->prot_hook);
1701         spin_unlock(&po->bind_lock);
1702
1703         err = -EBUSY;
1704         if (closing || atomic_read(&po->mapped) == 0) {
1705                 err = 0;
1706 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1707
1708                 spin_lock_bh(&sk->receive_queue.lock);
1709                 pg_vec = XC(po->pg_vec, pg_vec);
1710                 io_vec = XC(po->iovec, io_vec);
1711                 po->iovmax = req->tp_frame_nr-1;
1712                 po->head = 0;
1713                 po->frame_size = req->tp_frame_size;
1714                 spin_unlock_bh(&sk->receive_queue.lock);
1715
1716                 order = XC(po->pg_vec_order, order);
1717                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1718
1719                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1720                 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1721                 skb_queue_purge(&sk->receive_queue);
1722 #undef XC
1723                 if (atomic_read(&po->mapped))
1724                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1725         }
1726
1727         spin_lock(&po->bind_lock);
1728         if (po->running)
1729                 dev_add_pack(&po->prot_hook);
1730         spin_unlock(&po->bind_lock);
1731
1732         release_sock(sk);
1733
1734         if (io_vec)
1735                 kfree(io_vec);
1736
1737 out_free_pgvec:
1738         if (pg_vec)
1739                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1740 out:
1741         return err;
1742 }
1743
1744 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1745 {
1746         struct sock *sk = sock->sk;
1747         struct packet_opt *po = sk->protinfo.af_packet;
1748         unsigned long size;
1749         unsigned long start;
1750         int err = -EINVAL;
1751         int i;
1752
1753         if (vma->vm_pgoff)
1754                 return -EINVAL;
1755
1756         size = vma->vm_end - vma->vm_start;
1757
1758         lock_sock(sk);
1759         if (po->pg_vec == NULL)
1760                 goto out;
1761         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1762                 goto out;
1763
1764         atomic_inc(&po->mapped);
1765         start = vma->vm_start;
1766         err = -EAGAIN;
1767         for (i=0; i<po->pg_vec_len; i++) {
1768                 if (remap_page_range(start, __pa(po->pg_vec[i]),
1769                                      po->pg_vec_pages*PAGE_SIZE,
1770                                      vma->vm_page_prot))
1771                         goto out;
1772                 start += po->pg_vec_pages*PAGE_SIZE;
1773         }
1774         vma->vm_ops = &packet_mmap_ops;
1775         err = 0;
1776
1777 out:
1778         release_sock(sk);
1779         return err;
1780 }
1781 #endif
1782
1783
1784 #ifdef CONFIG_SOCK_PACKET
1785 struct proto_ops packet_ops_spkt = {
1786         family:         PF_PACKET,
1787
1788         release:        packet_release,
1789         bind:           packet_bind_spkt,
1790         connect:        sock_no_connect,
1791         socketpair:     sock_no_socketpair,
1792         accept:         sock_no_accept,
1793         getname:        packet_getname_spkt,
1794         poll:           datagram_poll,
1795         ioctl:          packet_ioctl,
1796         listen:         sock_no_listen,
1797         shutdown:       sock_no_shutdown,
1798         setsockopt:     sock_no_setsockopt,
1799         getsockopt:     sock_no_getsockopt,
1800         sendmsg:        packet_sendmsg_spkt,
1801         recvmsg:        packet_recvmsg,
1802         mmap:           sock_no_mmap,
1803         sendpage:       sock_no_sendpage,
1804 };
1805 #endif
1806
1807 struct proto_ops packet_ops = {
1808         family:         PF_PACKET,
1809
1810         release:        packet_release,
1811         bind:           packet_bind,
1812         connect:        sock_no_connect,
1813         socketpair:     sock_no_socketpair,
1814         accept:         sock_no_accept,
1815         getname:        packet_getname, 
1816         poll:           packet_poll,
1817         ioctl:          packet_ioctl,
1818         listen:         sock_no_listen,
1819         shutdown:       sock_no_shutdown,
1820         setsockopt:     packet_setsockopt,
1821         getsockopt:     packet_getsockopt,
1822         sendmsg:        packet_sendmsg,
1823         recvmsg:        packet_recvmsg,
1824         mmap:           packet_mmap,
1825         sendpage:       sock_no_sendpage,
1826 };
1827
1828 static struct net_proto_family packet_family_ops = {
1829         family:         PF_PACKET,
1830         create:         packet_create,
1831 };
1832
1833 static struct notifier_block packet_netdev_notifier = {
1834         notifier_call:  packet_notifier,
1835 };
1836
1837 #ifdef CONFIG_PROC_FS
1838 static int packet_read_proc(char *buffer, char **start, off_t offset,
1839                              int length, int *eof, void *data)
1840 {
1841         off_t pos=0;
1842         off_t begin=0;
1843         int len=0;
1844         struct sock *s;
1845         
1846         len+= sprintf(buffer,"sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1847
1848         read_lock(&packet_sklist_lock);
1849
1850         for (s = packet_sklist; s; s = s->next) {
1851                 len+=sprintf(buffer+len,"%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu",
1852                              s,
1853                              atomic_read(&s->refcnt),
1854                              s->type,
1855                              ntohs(s->num),
1856                              s->protinfo.af_packet->ifindex,
1857                              s->protinfo.af_packet->running,
1858                              atomic_read(&s->rmem_alloc),
1859                              sock_i_uid(s),
1860                              sock_i_ino(s)
1861                              );
1862
1863                 buffer[len++]='\n';
1864                 
1865                 pos=begin+len;
1866                 if(pos<offset) {
1867                         len=0;
1868                         begin=pos;
1869                 }
1870                 if(pos>offset+length)
1871                         goto done;
1872         }
1873         *eof = 1;
1874
1875 done:
1876         read_unlock(&packet_sklist_lock);
1877         *start=buffer+(offset-begin);
1878         len-=(offset-begin);
1879         if(len>length)
1880                 len=length;
1881         if(len<0)
1882                 len=0;
1883         return len;
1884 }
1885 #endif
1886
1887 static void __exit packet_exit(void)
1888 {
1889         remove_proc_entry("net/packet", 0);
1890         unregister_netdevice_notifier(&packet_netdev_notifier);
1891         sock_unregister(PF_PACKET);
1892         return;
1893 }
1894
1895 static int __init packet_init(void)
1896 {
1897         sock_register(&packet_family_ops);
1898         register_netdevice_notifier(&packet_netdev_notifier);
1899 #ifdef CONFIG_PROC_FS
1900         create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1901 #endif
1902         return 0;
1903 }
1904
1905 module_init(packet_init);
1906 module_exit(packet_exit);
1907 MODULE_LICENSE("GPL");