2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
8 * Version: $Id: af_packet.c,v 1.1.1.1 2005/04/11 02:51:14 jack Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
45 #include <linux/config.h>
46 #include <linux/types.h>
47 #include <linux/sched.h>
49 #include <linux/fcntl.h>
50 #include <linux/socket.h>
52 #include <linux/inet.h>
53 #include <linux/netdevice.h>
54 #include <linux/if_packet.h>
55 #include <linux/wireless.h>
56 #include <linux/kmod.h>
58 #include <net/protocol.h>
59 #include <linux/skbuff.h>
61 #include <linux/errno.h>
62 #include <linux/timer.h>
63 #include <asm/system.h>
64 #include <asm/uaccess.h>
65 #include <asm/ioctls.h>
66 #include <linux/proc_fs.h>
67 #include <linux/poll.h>
68 #include <linux/module.h>
69 #include <linux/init.h>
70 #include <linux/if_bridge.h>
72 #ifdef CONFIG_NET_DIVERT
73 #include <linux/divert.h>
74 #endif /* CONFIG_NET_DIVERT */
77 #include <net/inet_common.h>
81 extern int dlci_ioctl(unsigned int, void*);
84 #define CONFIG_SOCK_PACKET 1
87 Proposed replacement for SIOC{ADD,DEL}MULTI and
88 IFF_PROMISC, IFF_ALLMULTI flags.
90 It is more expensive, but I believe,
91 it is really correct solution: reentereble, safe and fault tolerant.
93 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
94 reference count and global flag, so that real status is
95 (gflag|(count != 0)), so that we can use obsolete faulty interface
96 not harming clever users.
98 #define CONFIG_PACKET_MULTICAST 1
102 - if device has no dev->hard_header routine, it adds and removes ll header
103 inside itself. In this case ll header is invisible outside of device,
104 but higher levels still should reserve dev->hard_header_len.
105 Some devices are enough clever to reallocate skb, when header
106 will not fit to reserved space (tunnel), another ones are silly
108 - packet socket receives packets with pulled ll header,
109 so that SOCK_RAW should push it back.
114 Incoming, dev->hard_header!=NULL
118 Outgoing, dev->hard_header!=NULL
122 Incoming, dev->hard_header==NULL
123 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
124 PPP makes it, that is wrong, because introduce assymetry
125 between rx and tx paths.
128 Outgoing, dev->hard_header==NULL
129 mac.raw -> data. ll header is still not built!
133 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
139 dev->hard_header != NULL
143 dev->hard_header == NULL (ll header is added by device, we cannot control it)
147 We should set nh.raw on output to correct posistion,
148 packet classifier depends on it.
151 /* List of all packet sockets. */
152 static struct sock * packet_sklist;
153 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
155 atomic_t packet_socks_nr;
158 /* Private packet socket structures. */
160 #ifdef CONFIG_PACKET_MULTICAST
163 struct packet_mclist *next;
168 unsigned char addr[8];
171 #ifdef CONFIG_PACKET_MMAP
172 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
175 static void packet_flush_mclist(struct sock *sk);
179 struct packet_type prot_hook;
180 spinlock_t bind_lock;
181 char running; /* prot_hook is attached*/
182 int ifindex; /* bound device */
183 struct tpacket_stats stats;
184 #ifdef CONFIG_PACKET_MULTICAST
185 struct packet_mclist *mclist;
187 #ifdef CONFIG_PACKET_MMAP
189 unsigned long *pg_vec;
190 unsigned int pg_vec_order;
191 unsigned int pg_vec_pages;
192 unsigned int pg_vec_len;
194 struct tpacket_hdr **iovec;
195 unsigned int frame_size;
202 void packet_sock_destruct(struct sock *sk)
204 BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
205 BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
208 printk("Attempt to release alive packet socket: %p\n", sk);
212 if (sk->protinfo.destruct_hook)
213 kfree(sk->protinfo.destruct_hook);
214 atomic_dec(&packet_socks_nr);
215 #ifdef PACKET_REFCNT_DEBUG
216 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
222 extern struct proto_ops packet_ops;
224 #ifdef CONFIG_SOCK_PACKET
225 extern struct proto_ops packet_ops_spkt;
227 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
230 struct sockaddr_pkt *spkt;
233 * When we registered the protocol we saved the socket in the data
234 * field for just this event.
237 sk = (struct sock *) pt->data;
240 * Yank back the headers [hope the device set this
241 * right or kerboom...]
243 * Incoming packets have ll header pulled,
246 * For outgoing ones skb->data == skb->mac.raw
247 * so that this procedure is noop.
250 if (skb->pkt_type == PACKET_LOOPBACK)
253 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
256 spkt = (struct sockaddr_pkt*)skb->cb;
258 skb_push(skb, skb->data-skb->mac.raw);
261 * The SOCK_PACKET socket receives _all_ frames.
264 spkt->spkt_family = dev->type;
265 strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
266 spkt->spkt_protocol = skb->protocol;
269 * Charge the memory to the socket. This is done specifically
270 * to prevent sockets using all the memory up.
273 if (sock_queue_rcv_skb(sk,skb) == 0)
284 * Output a raw packet to a device layer. This bypasses all the other
285 * protocol layers and you must therefore supply it with a complete frame
288 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
289 struct scm_cookie *scm)
291 struct sock *sk = sock->sk;
292 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
294 struct net_device *dev;
295 unsigned short proto=0;
299 * Get and verify the address.
304 if (msg->msg_namelen < sizeof(struct sockaddr))
306 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
307 proto=saddr->spkt_protocol;
310 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
313 * Find the device first to size check it
316 saddr->spkt_device[13] = 0;
317 dev = dev_get_by_name(saddr->spkt_device);
323 * You may not queue a frame bigger than the mtu. This is the lowest level
324 * raw protocol and you must do your own fragmentation at this level.
328 if(len>dev->mtu+dev->hard_header_len)
332 skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
335 * If the write buffer is full, then tough. At this level the user gets to
336 * deal with the problem - do your own algorithmic backoffs. That's far
347 /* FIXME: Save some space for broken drivers that write a
348 * hard header at transmission time by themselves. PPP is the
349 * notable one here. This should really be fixed at the driver level.
351 skb_reserve(skb,(dev->hard_header_len+15)&~15);
352 skb->nh.raw = skb->data;
354 /* Try to align data part correctly */
355 if (dev->hard_header) {
356 skb->data -= dev->hard_header_len;
357 skb->tail -= dev->hard_header_len;
358 if (len < dev->hard_header_len)
359 skb->nh.raw = skb->data;
362 /* Returns -EFAULT on error */
363 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
364 skb->protocol = proto;
366 skb->priority = sk->priority;
371 if (!(dev->flags & IFF_UP))
392 This function makes lazy skb cloning in hope that most of packets
393 are discarded by BPF.
395 Note tricky part: we DO mangle shared skb! skb->data, skb->len
396 and skb->cb are mangled. It works because (and until) packets
397 falling here are owned by current CPU. Output packets are cloned
398 by dev_queue_xmit_nit(), input packets are processed by net_bh
399 sequencially, so that if we return skb to original state on exit,
400 we will not harm anyone.
403 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
406 struct sockaddr_ll *sll;
407 struct packet_opt *po;
408 u8 * skb_head = skb->data;
409 int skb_len = skb->len;
414 if (skb->pkt_type == PACKET_LOOPBACK)
417 sk = (struct sock *) pt->data;
418 po = sk->protinfo.af_packet;
422 if (dev->hard_header) {
423 /* The device has an explicit notion of ll header,
424 exported to higher levels.
426 Otherwise, the device hides datails of it frame
427 structure, so that corresponding packet head
428 never delivered to user.
430 if (sk->type != SOCK_DGRAM)
431 skb_push(skb, skb->data - skb->mac.raw);
432 else if (skb->pkt_type == PACKET_OUTGOING) {
433 /* Special case: outgoing packets have ll header at head */
434 skb_pull(skb, skb->nh.raw - skb->data);
442 unsigned res = snaplen;
443 struct sk_filter *filter;
446 if ((filter = sk->filter) != NULL)
447 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
455 #endif /* CONFIG_FILTER */
457 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
460 if (skb_shared(skb)) {
461 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
465 if (skb_head != skb->data) {
466 skb->data = skb_head;
473 sll = (struct sockaddr_ll*)skb->cb;
474 sll->sll_family = AF_PACKET;
475 sll->sll_hatype = dev->type;
476 sll->sll_protocol = skb->protocol;
477 sll->sll_pkttype = skb->pkt_type;
478 sll->sll_ifindex = dev->ifindex;
481 if (dev->hard_header_parse)
482 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
485 if (pskb_trim(skb, snaplen))
489 skb_set_owner_r(skb, sk);
491 spin_lock(&sk->receive_queue.lock);
492 po->stats.tp_packets++;
493 __skb_queue_tail(&sk->receive_queue, skb);
494 spin_unlock(&sk->receive_queue.lock);
495 sk->data_ready(sk,skb->len);
499 spin_lock(&sk->receive_queue.lock);
500 po->stats.tp_drops++;
501 spin_unlock(&sk->receive_queue.lock);
506 if (skb_head != skb->data && skb_shared(skb)) {
507 skb->data = skb_head;
515 #ifdef CONFIG_PACKET_MMAP
516 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
519 struct packet_opt *po;
520 struct sockaddr_ll *sll;
521 struct tpacket_hdr *h;
522 u8 * skb_head = skb->data;
523 int skb_len = skb->len;
525 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
526 unsigned short macoff, netoff;
527 struct sk_buff *copy_skb = NULL;
529 if (skb->pkt_type == PACKET_LOOPBACK)
532 sk = (struct sock *) pt->data;
533 po = sk->protinfo.af_packet;
535 if (dev->hard_header) {
536 if (sk->type != SOCK_DGRAM)
537 skb_push(skb, skb->data - skb->mac.raw);
538 else if (skb->pkt_type == PACKET_OUTGOING) {
539 /* Special case: outgoing packets have ll header at head */
540 skb_pull(skb, skb->nh.raw - skb->data);
541 if (skb->ip_summed == CHECKSUM_HW)
542 status |= TP_STATUS_CSUMNOTREADY;
550 unsigned res = snaplen;
551 struct sk_filter *filter;
554 if ((filter = sk->filter) != NULL)
555 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
565 if (sk->type == SOCK_DGRAM) {
566 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
568 unsigned maclen = skb->nh.raw - skb->data;
569 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
570 macoff = netoff - maclen;
573 if (macoff + snaplen > po->frame_size) {
574 if (po->copy_thresh &&
575 atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
576 if (skb_shared(skb)) {
577 copy_skb = skb_clone(skb, GFP_ATOMIC);
579 copy_skb = skb_get(skb);
580 skb_head = skb->data;
583 skb_set_owner_r(copy_skb, sk);
585 snaplen = po->frame_size - macoff;
586 if ((int)snaplen < 0)
589 if (snaplen > skb->len-skb->data_len)
590 snaplen = skb->len-skb->data_len;
592 spin_lock(&sk->receive_queue.lock);
593 h = po->iovec[po->head];
597 po->head = po->head != po->iovmax ? po->head+1 : 0;
598 po->stats.tp_packets++;
600 status |= TP_STATUS_COPY;
601 __skb_queue_tail(&sk->receive_queue, copy_skb);
603 if (!po->stats.tp_drops)
604 status &= ~TP_STATUS_LOSING;
605 spin_unlock(&sk->receive_queue.lock);
607 memcpy((u8*)h + macoff, skb->data, snaplen);
609 h->tp_len = skb->len;
610 h->tp_snaplen = snaplen;
613 h->tp_sec = skb->stamp.tv_sec;
614 h->tp_usec = skb->stamp.tv_usec;
616 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
618 if (dev->hard_header_parse)
619 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
620 sll->sll_family = AF_PACKET;
621 sll->sll_hatype = dev->type;
622 sll->sll_protocol = skb->protocol;
623 sll->sll_pkttype = skb->pkt_type;
624 sll->sll_ifindex = dev->ifindex;
626 h->tp_status = status;
630 struct page *p_start, *p_end;
631 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
633 p_start = virt_to_page(h);
634 p_end = virt_to_page(h_end);
635 while (p_start <= p_end) {
636 flush_dcache_page(p_start);
641 sk->data_ready(sk, 0);
644 if (skb_head != skb->data && skb_shared(skb)) {
645 skb->data = skb_head;
653 po->stats.tp_drops++;
654 spin_unlock(&sk->receive_queue.lock);
656 sk->data_ready(sk, 0);
665 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
666 struct scm_cookie *scm)
668 struct sock *sk = sock->sk;
669 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
671 struct net_device *dev;
672 unsigned short proto;
674 int ifindex, err, reserve = 0;
677 * Get and verify the address.
681 ifindex = sk->protinfo.af_packet->ifindex;
686 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
688 ifindex = saddr->sll_ifindex;
689 proto = saddr->sll_protocol;
690 addr = saddr->sll_addr;
694 dev = dev_get_by_index(ifindex);
698 if (sock->type == SOCK_RAW)
699 reserve = dev->hard_header_len;
702 if (len > dev->mtu+reserve)
705 skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15,
706 msg->msg_flags & MSG_DONTWAIT, &err);
710 skb_reserve(skb, (dev->hard_header_len+15)&~15);
711 skb->nh.raw = skb->data;
713 if (dev->hard_header) {
716 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
717 if (sock->type != SOCK_DGRAM) {
718 skb->tail = skb->data;
724 /* Returns -EFAULT on error */
725 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
729 skb->protocol = proto;
731 skb->priority = sk->priority;
734 if (!(dev->flags & IFF_UP))
741 err = dev_queue_xmit(skb);
742 if (err > 0 && (err = net_xmit_errno(err)) != 0)
759 * Close a PACKET socket. This is fairly simple. We immediately go
760 * to 'closed' state and remove our protocol entry in the device list.
763 static int packet_release(struct socket *sock)
765 struct sock *sk = sock->sk;
771 write_lock_bh(&packet_sklist_lock);
772 for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
779 write_unlock_bh(&packet_sklist_lock);
782 * Unhook packet receive handler.
785 if (sk->protinfo.af_packet->running) {
787 * Remove the protocol hook
789 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
790 sk->protinfo.af_packet->running = 0;
794 #ifdef CONFIG_PACKET_MULTICAST
795 packet_flush_mclist(sk);
798 #ifdef CONFIG_PACKET_MMAP
799 if (sk->protinfo.af_packet->pg_vec) {
800 struct tpacket_req req;
801 memset(&req, 0, sizeof(req));
802 packet_set_ring(sk, &req, 1);
807 * Now the socket is dead. No more input will appear.
815 skb_queue_purge(&sk->receive_queue);
822 * Attach a packet hook.
825 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
828 * Detach an existing hook if present.
833 spin_lock(&sk->protinfo.af_packet->bind_lock);
834 if (sk->protinfo.af_packet->running) {
835 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
837 sk->protinfo.af_packet->running = 0;
841 sk->protinfo.af_packet->prot_hook.type = protocol;
842 sk->protinfo.af_packet->prot_hook.dev = dev;
844 sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
850 if (dev->flags&IFF_UP) {
851 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
853 sk->protinfo.af_packet->running = 1;
857 sk->error_report(sk);
860 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
862 sk->protinfo.af_packet->running = 1;
866 spin_unlock(&sk->protinfo.af_packet->bind_lock);
872 * Bind a packet socket to a device
875 #ifdef CONFIG_SOCK_PACKET
877 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
879 struct sock *sk=sock->sk;
881 struct net_device *dev;
888 if(addr_len!=sizeof(struct sockaddr))
890 strncpy(name,uaddr->sa_data,14);
893 dev = dev_get_by_name(name);
895 err = packet_do_bind(sk, dev, sk->num);
902 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
904 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
905 struct sock *sk=sock->sk;
906 struct net_device *dev = NULL;
914 if (addr_len < sizeof(struct sockaddr_ll))
916 if (sll->sll_family != AF_PACKET)
919 if (sll->sll_ifindex) {
921 dev = dev_get_by_index(sll->sll_ifindex);
925 err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
935 * Create a packet of type SOCK_PACKET.
938 static int packet_create(struct socket *sock, int protocol)
943 if (!capable(CAP_NET_RAW))
945 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
946 #ifdef CONFIG_SOCK_PACKET
947 && sock->type != SOCK_PACKET
950 return -ESOCKTNOSUPPORT;
952 sock->state = SS_UNCONNECTED;
956 sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
960 sock->ops = &packet_ops;
961 #ifdef CONFIG_SOCK_PACKET
962 if (sock->type == SOCK_PACKET)
963 sock->ops = &packet_ops_spkt;
965 sock_init_data(sock,sk);
967 sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
968 if (sk->protinfo.af_packet == NULL)
970 memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
971 sk->family = PF_PACKET;
974 sk->destruct = packet_sock_destruct;
975 atomic_inc(&packet_socks_nr);
978 * Attach a protocol block
981 spin_lock_init(&sk->protinfo.af_packet->bind_lock);
982 sk->protinfo.af_packet->prot_hook.func = packet_rcv;
983 #ifdef CONFIG_SOCK_PACKET
984 if (sock->type == SOCK_PACKET)
985 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
987 sk->protinfo.af_packet->prot_hook.data = (void *)sk;
990 sk->protinfo.af_packet->prot_hook.type = protocol;
991 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
993 sk->protinfo.af_packet->running = 1;
996 write_lock_bh(&packet_sklist_lock);
997 sk->next = packet_sklist;
1000 write_unlock_bh(&packet_sklist_lock);
1011 * Pull a packet from our receive queue and hand it to the user.
1012 * If necessary we block.
1015 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
1016 int flags, struct scm_cookie *scm)
1018 struct sock *sk = sock->sk;
1019 struct sk_buff *skb;
1023 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1027 /* What error should we return now? EUNATTACH? */
1028 if (sk->protinfo.af_packet->ifindex < 0)
1033 * If the address length field is there to be filled in, we fill
1037 if (sock->type == SOCK_PACKET)
1038 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1040 msg->msg_namelen = sizeof(struct sockaddr_ll);
1043 * Call the generic datagram receiver. This handles all sorts
1044 * of horrible races and re-entrancy so we can forget about it
1045 * in the protocol layers.
1047 * Now it will return ENETDOWN, if device have just gone down,
1048 * but then it will block.
1051 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1054 * An error occurred so return it. Because skb_recv_datagram()
1055 * handles the blocking we don't see and worry about blocking
1063 * You lose any data beyond the buffer you gave. If it worries a
1064 * user program they can ask the device for its MTU anyway.
1071 msg->msg_flags|=MSG_TRUNC;
1074 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1078 sock_recv_timestamp(msg, sk, skb);
1081 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1084 * Free or return the buffer as appropriate. Again this
1085 * hides all the races and re-entrancy issues from us.
1087 err = (flags&MSG_TRUNC) ? skb->len : copied;
1090 skb_free_datagram(sk, skb);
1095 #ifdef CONFIG_SOCK_PACKET
1096 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1097 int *uaddr_len, int peer)
1099 struct net_device *dev;
1100 struct sock *sk = sock->sk;
1105 uaddr->sa_family = AF_PACKET;
1106 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1108 strncpy(uaddr->sa_data, dev->name, 15);
1111 memset(uaddr->sa_data, 0, 14);
1112 *uaddr_len = sizeof(*uaddr);
1118 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1119 int *uaddr_len, int peer)
1121 struct net_device *dev;
1122 struct sock *sk = sock->sk;
1123 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1128 sll->sll_family = AF_PACKET;
1129 sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1130 sll->sll_protocol = sk->num;
1131 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1133 sll->sll_hatype = dev->type;
1134 sll->sll_halen = dev->addr_len;
1135 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1138 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1141 *uaddr_len = sizeof(*sll);
1146 #ifdef CONFIG_PACKET_MULTICAST
1147 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1150 case PACKET_MR_MULTICAST:
1152 dev_mc_add(dev, i->addr, i->alen, 0);
1154 dev_mc_delete(dev, i->addr, i->alen, 0);
1156 case PACKET_MR_PROMISC:
1157 dev_set_promiscuity(dev, what);
1159 case PACKET_MR_ALLMULTI:
1160 dev_set_allmulti(dev, what);
1166 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1168 for ( ; i; i=i->next) {
1169 if (i->ifindex == dev->ifindex)
1170 packet_dev_mc(dev, i, what);
1174 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1176 struct packet_mclist *ml, *i;
1177 struct net_device *dev;
1183 dev = __dev_get_by_index(mreq->mr_ifindex);
1188 if (mreq->mr_alen > dev->addr_len)
1192 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1197 for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1198 if (ml->ifindex == mreq->mr_ifindex &&
1199 ml->type == mreq->mr_type &&
1200 ml->alen == mreq->mr_alen &&
1201 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1203 /* Free the new element ... */
1209 i->type = mreq->mr_type;
1210 i->ifindex = mreq->mr_ifindex;
1211 i->alen = mreq->mr_alen;
1212 memcpy(i->addr, mreq->mr_address, i->alen);
1214 i->next = sk->protinfo.af_packet->mclist;
1215 sk->protinfo.af_packet->mclist = i;
1216 packet_dev_mc(dev, i, +1);
1223 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1225 struct packet_mclist *ml, **mlp;
1229 for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1230 if (ml->ifindex == mreq->mr_ifindex &&
1231 ml->type == mreq->mr_type &&
1232 ml->alen == mreq->mr_alen &&
1233 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1234 if (--ml->count == 0) {
1235 struct net_device *dev;
1237 dev = dev_get_by_index(ml->ifindex);
1239 packet_dev_mc(dev, ml, -1);
1249 return -EADDRNOTAVAIL;
1252 static void packet_flush_mclist(struct sock *sk)
1254 struct packet_mclist *ml;
1256 if (sk->protinfo.af_packet->mclist == NULL)
1260 while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1261 struct net_device *dev;
1262 sk->protinfo.af_packet->mclist = ml->next;
1263 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1264 packet_dev_mc(dev, ml, -1);
1274 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1276 struct sock *sk = sock->sk;
1279 if (level != SOL_PACKET)
1280 return -ENOPROTOOPT;
1283 #ifdef CONFIG_PACKET_MULTICAST
1284 case PACKET_ADD_MEMBERSHIP:
1285 case PACKET_DROP_MEMBERSHIP:
1287 struct packet_mreq mreq;
1288 if (optlen<sizeof(mreq))
1290 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1292 if (optname == PACKET_ADD_MEMBERSHIP)
1293 ret = packet_mc_add(sk, &mreq);
1295 ret = packet_mc_drop(sk, &mreq);
1299 #ifdef CONFIG_PACKET_MMAP
1300 case PACKET_RX_RING:
1302 struct tpacket_req req;
1304 if (optlen<sizeof(req))
1306 if (copy_from_user(&req,optval,sizeof(req)))
1308 return packet_set_ring(sk, &req, 0);
1310 case PACKET_COPY_THRESH:
1314 if (optlen!=sizeof(val))
1316 if (copy_from_user(&val,optval,sizeof(val)))
1319 sk->protinfo.af_packet->copy_thresh = val;
1324 return -ENOPROTOOPT;
1328 int packet_getsockopt(struct socket *sock, int level, int optname,
1329 char *optval, int *optlen)
1332 struct sock *sk = sock->sk;
1334 if (level != SOL_PACKET)
1335 return -ENOPROTOOPT;
1337 if (get_user(len,optlen))
1344 case PACKET_STATISTICS:
1346 struct tpacket_stats st;
1348 if (len > sizeof(struct tpacket_stats))
1349 len = sizeof(struct tpacket_stats);
1350 spin_lock_bh(&sk->receive_queue.lock);
1351 st = sk->protinfo.af_packet->stats;
1352 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1353 spin_unlock_bh(&sk->receive_queue.lock);
1354 st.tp_packets += st.tp_drops;
1356 if (copy_to_user(optval, &st, len))
1361 return -ENOPROTOOPT;
1364 if (put_user(len, optlen))
1370 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1373 struct packet_opt *po;
1374 struct net_device *dev = (struct net_device*)data;
1376 read_lock(&packet_sklist_lock);
1377 for (sk = packet_sklist; sk; sk = sk->next) {
1378 po = sk->protinfo.af_packet;
1382 case NETDEV_UNREGISTER:
1383 if (dev->ifindex == po->ifindex) {
1384 spin_lock(&po->bind_lock);
1386 dev_remove_pack(&po->prot_hook);
1391 sk->error_report(sk);
1393 if (msg == NETDEV_UNREGISTER) {
1395 po->prot_hook.dev = NULL;
1397 spin_unlock(&po->bind_lock);
1399 #ifdef CONFIG_PACKET_MULTICAST
1401 packet_dev_mclist(dev, po->mclist, -1);
1405 spin_lock(&po->bind_lock);
1406 if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1407 dev_add_pack(&po->prot_hook);
1411 spin_unlock(&po->bind_lock);
1412 #ifdef CONFIG_PACKET_MULTICAST
1414 packet_dev_mclist(dev, po->mclist, +1);
1419 read_unlock(&packet_sklist_lock);
1424 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1427 struct sock *sk = sock->sk;
1433 int amount = atomic_read(&sk->wmem_alloc);
1434 return put_user(amount, (int *)arg);
1438 struct sk_buff *skb;
1441 spin_lock_bh(&sk->receive_queue.lock);
1442 skb = skb_peek(&sk->receive_queue);
1445 spin_unlock_bh(&sk->receive_queue.lock);
1446 return put_user(amount, (int *)arg);
1451 if (get_user(pid, (int *) arg))
1453 if (current->pid != pid && current->pgrp != -pid &&
1454 !capable(CAP_NET_ADMIN))
1461 return put_user(sk->proc, (int *)arg);
1463 if(sk->stamp.tv_sec==0)
1465 if (copy_to_user((void *)arg, &sk->stamp,
1466 sizeof(struct timeval)))
1490 case SIOCSIFHWBROADCAST:
1491 return(dev_ioctl(cmd,(void *) arg));
1495 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1498 if (br_ioctl_hook == NULL)
1499 request_module("bridge");
1501 if (br_ioctl_hook != NULL)
1502 return br_ioctl_hook(arg);
1509 #ifdef CONFIG_NET_DIVERT
1510 return divert_ioctl(cmd, (struct divert_cf *) arg);
1513 #endif /* CONFIG_NET_DIVERT */
1523 case SIOCGIFBRDADDR:
1524 case SIOCSIFBRDADDR:
1525 case SIOCGIFNETMASK:
1526 case SIOCSIFNETMASK:
1527 case SIOCGIFDSTADDR:
1528 case SIOCSIFDSTADDR:
1532 return inet_dgram_ops.ioctl(sock, cmd, arg);
1536 if ((cmd >= SIOCDEVPRIVATE) &&
1537 (cmd <= (SIOCDEVPRIVATE + 15)))
1538 return(dev_ioctl(cmd,(void *) arg));
1540 #ifdef CONFIG_NET_RADIO
1541 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1542 return(dev_ioctl(cmd,(void *) arg));
1549 #ifndef CONFIG_PACKET_MMAP
1550 #define packet_mmap sock_no_mmap
1551 #define packet_poll datagram_poll
1554 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1556 struct sock *sk = sock->sk;
1557 struct packet_opt *po = sk->protinfo.af_packet;
1558 unsigned int mask = datagram_poll(file, sock, wait);
1560 spin_lock_bh(&sk->receive_queue.lock);
1562 unsigned last = po->head ? po->head-1 : po->iovmax;
1564 if (po->iovec[last]->tp_status)
1565 mask |= POLLIN | POLLRDNORM;
1567 spin_unlock_bh(&sk->receive_queue.lock);
1572 /* Dirty? Well, I still did not learn better way to account
1576 static void packet_mm_open(struct vm_area_struct *vma)
1578 struct file *file = vma->vm_file;
1579 struct inode *inode = file->f_dentry->d_inode;
1580 struct socket * sock = &inode->u.socket_i;
1581 struct sock *sk = sock->sk;
1584 atomic_inc(&sk->protinfo.af_packet->mapped);
1587 static void packet_mm_close(struct vm_area_struct *vma)
1589 struct file *file = vma->vm_file;
1590 struct inode *inode = file->f_dentry->d_inode;
1591 struct socket * sock = &inode->u.socket_i;
1592 struct sock *sk = sock->sk;
1595 atomic_dec(&sk->protinfo.af_packet->mapped);
1598 static struct vm_operations_struct packet_mmap_ops = {
1599 open: packet_mm_open,
1600 close: packet_mm_close,
1603 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1607 for (i=0; i<len; i++) {
1609 struct page *page, *pend;
1611 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1612 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1613 ClearPageReserved(page);
1614 free_pages(pg_vec[i], order);
1621 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1623 unsigned long *pg_vec = NULL;
1624 struct tpacket_hdr **io_vec = NULL;
1625 struct packet_opt *po = sk->protinfo.af_packet;
1629 if (req->tp_block_nr) {
1631 int frames_per_block;
1633 /* Sanity tests and some calculations */
1634 if ((int)req->tp_block_size <= 0)
1636 if (req->tp_block_size&(PAGE_SIZE-1))
1638 if (req->tp_frame_size < TPACKET_HDRLEN)
1640 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1642 frames_per_block = req->tp_block_size/req->tp_frame_size;
1643 if (frames_per_block <= 0)
1645 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1649 /* Allocate page vector */
1650 while ((PAGE_SIZE<<order) < req->tp_block_size)
1655 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1658 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1660 for (i=0; i<req->tp_block_nr; i++) {
1661 struct page *page, *pend;
1662 pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1664 goto out_free_pgvec;
1665 memset((void *)(pg_vec[i]), 0, PAGE_SIZE << order);
1666 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1667 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1668 SetPageReserved(page);
1670 /* Page vector is allocated */
1673 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1675 goto out_free_pgvec;
1676 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1679 for (i=0; i<req->tp_block_nr; i++) {
1680 unsigned long ptr = pg_vec[i];
1683 for (k=0; k<frames_per_block; k++, l++) {
1684 io_vec[l] = (struct tpacket_hdr*)ptr;
1685 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1686 ptr += req->tp_frame_size;
1691 if (req->tp_frame_nr)
1697 /* Detach socket from network */
1698 spin_lock(&po->bind_lock);
1700 dev_remove_pack(&po->prot_hook);
1701 spin_unlock(&po->bind_lock);
1704 if (closing || atomic_read(&po->mapped) == 0) {
1706 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1708 spin_lock_bh(&sk->receive_queue.lock);
1709 pg_vec = XC(po->pg_vec, pg_vec);
1710 io_vec = XC(po->iovec, io_vec);
1711 po->iovmax = req->tp_frame_nr-1;
1713 po->frame_size = req->tp_frame_size;
1714 spin_unlock_bh(&sk->receive_queue.lock);
1716 order = XC(po->pg_vec_order, order);
1717 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1719 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1720 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1721 skb_queue_purge(&sk->receive_queue);
1723 if (atomic_read(&po->mapped))
1724 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1727 spin_lock(&po->bind_lock);
1729 dev_add_pack(&po->prot_hook);
1730 spin_unlock(&po->bind_lock);
1739 free_pg_vec(pg_vec, order, req->tp_block_nr);
1744 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1746 struct sock *sk = sock->sk;
1747 struct packet_opt *po = sk->protinfo.af_packet;
1749 unsigned long start;
1756 size = vma->vm_end - vma->vm_start;
1759 if (po->pg_vec == NULL)
1761 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1764 atomic_inc(&po->mapped);
1765 start = vma->vm_start;
1767 for (i=0; i<po->pg_vec_len; i++) {
1768 if (remap_page_range(start, __pa(po->pg_vec[i]),
1769 po->pg_vec_pages*PAGE_SIZE,
1772 start += po->pg_vec_pages*PAGE_SIZE;
1774 vma->vm_ops = &packet_mmap_ops;
1784 #ifdef CONFIG_SOCK_PACKET
1785 struct proto_ops packet_ops_spkt = {
1788 release: packet_release,
1789 bind: packet_bind_spkt,
1790 connect: sock_no_connect,
1791 socketpair: sock_no_socketpair,
1792 accept: sock_no_accept,
1793 getname: packet_getname_spkt,
1794 poll: datagram_poll,
1795 ioctl: packet_ioctl,
1796 listen: sock_no_listen,
1797 shutdown: sock_no_shutdown,
1798 setsockopt: sock_no_setsockopt,
1799 getsockopt: sock_no_getsockopt,
1800 sendmsg: packet_sendmsg_spkt,
1801 recvmsg: packet_recvmsg,
1803 sendpage: sock_no_sendpage,
1807 struct proto_ops packet_ops = {
1810 release: packet_release,
1812 connect: sock_no_connect,
1813 socketpair: sock_no_socketpair,
1814 accept: sock_no_accept,
1815 getname: packet_getname,
1817 ioctl: packet_ioctl,
1818 listen: sock_no_listen,
1819 shutdown: sock_no_shutdown,
1820 setsockopt: packet_setsockopt,
1821 getsockopt: packet_getsockopt,
1822 sendmsg: packet_sendmsg,
1823 recvmsg: packet_recvmsg,
1825 sendpage: sock_no_sendpage,
1828 static struct net_proto_family packet_family_ops = {
1830 create: packet_create,
1833 static struct notifier_block packet_netdev_notifier = {
1834 notifier_call: packet_notifier,
1837 #ifdef CONFIG_PROC_FS
1838 static int packet_read_proc(char *buffer, char **start, off_t offset,
1839 int length, int *eof, void *data)
1846 len+= sprintf(buffer,"sk RefCnt Type Proto Iface R Rmem User Inode\n");
1848 read_lock(&packet_sklist_lock);
1850 for (s = packet_sklist; s; s = s->next) {
1851 len+=sprintf(buffer+len,"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu",
1853 atomic_read(&s->refcnt),
1856 s->protinfo.af_packet->ifindex,
1857 s->protinfo.af_packet->running,
1858 atomic_read(&s->rmem_alloc),
1870 if(pos>offset+length)
1876 read_unlock(&packet_sklist_lock);
1877 *start=buffer+(offset-begin);
1878 len-=(offset-begin);
1887 static void __exit packet_exit(void)
1889 remove_proc_entry("net/packet", 0);
1890 unregister_netdevice_notifier(&packet_netdev_notifier);
1891 sock_unregister(PF_PACKET);
1895 static int __init packet_init(void)
1897 sock_register(&packet_family_ops);
1898 register_netdevice_notifier(&packet_netdev_notifier);
1899 #ifdef CONFIG_PROC_FS
1900 create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1905 module_init(packet_init);
1906 module_exit(packet_exit);
1907 MODULE_LICENSE("GPL");