import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / net / core / netfilter.c
1 /* netfilter.c: look after the filters for various protocols. 
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  *
9  * February 2000: Modified by James Morris to have 1 queue per protocol.
10  * 15-Mar-2000:   Added NF_REPEAT --RR.
11  */
12 #include <linux/config.h>
13 #include <linux/netfilter.h>
14 #include <net/protocol.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/wait.h>
18 #include <linux/module.h>
19 #include <linux/interrupt.h>
20 #include <linux/if.h>
21 #include <linux/netdevice.h>
22 #include <linux/brlock.h>
23 #include <linux/inetdevice.h>
24 #include <net/sock.h>
25 #include <net/route.h>
26 #include <linux/ip.h>
27
28 #define __KERNEL_SYSCALLS__
29 #include <linux/unistd.h>
30
31 /* In this code, we can be waiting indefinitely for userspace to
32  * service a packet if a hook returns NF_QUEUE.  We could keep a count
33  * of skbuffs queued for userspace, and not deregister a hook unless
34  * this is zero, but that sucks.  Now, we simply check when the
35  * packets come back: if the hook is gone, the packet is discarded. */
36 #ifdef CONFIG_NETFILTER_DEBUG
37 #define NFDEBUG(format, args...)  printk(format , ## args)
38 #else
39 #define NFDEBUG(format, args...)
40 #endif
41
42 /* Sockopts only registered and called from user context, so
43    BR_NETPROTO_LOCK would be overkill.  Also, [gs]etsockopt calls may
44    sleep. */
45 static DECLARE_MUTEX(nf_sockopt_mutex);
46
47 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
48 static LIST_HEAD(nf_sockopts);
49
50 /* 
51  * A queue handler may be registered for each protocol.  Each is protected by
52  * long term mutex.  The handler must provide an an outfn() to accept packets
53  * for queueing and must reinject all packets it receives, no matter what.
54  */
55 static struct nf_queue_handler_t {
56         nf_queue_outfn_t outfn;
57         void *data;
58 } queue_handler[NPROTO];
59
60 int nf_register_hook(struct nf_hook_ops *reg)
61 {
62         struct list_head *i;
63
64         br_write_lock_bh(BR_NETPROTO_LOCK);
65         for (i = nf_hooks[reg->pf][reg->hooknum].next; 
66              i != &nf_hooks[reg->pf][reg->hooknum]; 
67              i = i->next) {
68                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
69                         break;
70         }
71         list_add(&reg->list, i->prev);
72         br_write_unlock_bh(BR_NETPROTO_LOCK);
73         return 0;
74 }
75
76 void nf_unregister_hook(struct nf_hook_ops *reg)
77 {
78         br_write_lock_bh(BR_NETPROTO_LOCK);
79         list_del(&reg->list);
80         br_write_unlock_bh(BR_NETPROTO_LOCK);
81 }
82
83 /* Do exclusive ranges overlap? */
84 static inline int overlap(int min1, int max1, int min2, int max2)
85 {
86         return max1 > min2 && min1 < max2;
87 }
88
89 /* Functions to register sockopt ranges (exclusive). */
90 int nf_register_sockopt(struct nf_sockopt_ops *reg)
91 {
92         struct list_head *i;
93         int ret = 0;
94
95         if (down_interruptible(&nf_sockopt_mutex) != 0)
96                 return -EINTR;
97
98         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
99                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
100                 if (ops->pf == reg->pf
101                     && (overlap(ops->set_optmin, ops->set_optmax, 
102                                 reg->set_optmin, reg->set_optmax)
103                         || overlap(ops->get_optmin, ops->get_optmax, 
104                                    reg->get_optmin, reg->get_optmax))) {
105                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
106                                 ops->set_optmin, ops->set_optmax, 
107                                 ops->get_optmin, ops->get_optmax, 
108                                 reg->set_optmin, reg->set_optmax,
109                                 reg->get_optmin, reg->get_optmax);
110                         ret = -EBUSY;
111                         goto out;
112                 }
113         }
114
115         list_add(&reg->list, &nf_sockopts);
116 out:
117         up(&nf_sockopt_mutex);
118         return ret;
119 }
120
121 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
122 {
123         /* No point being interruptible: we're probably in cleanup_module() */
124  restart:
125         down(&nf_sockopt_mutex);
126         if (reg->use != 0) {
127                 /* To be woken by nf_sockopt call... */
128                 /* FIXME: Stuart Young's name appears gratuitously. */
129                 set_current_state(TASK_UNINTERRUPTIBLE);
130                 reg->cleanup_task = current;
131                 up(&nf_sockopt_mutex);
132                 schedule();
133                 goto restart;
134         }
135         list_del(&reg->list);
136         up(&nf_sockopt_mutex);
137 }
138
139 #ifdef CONFIG_NETFILTER_DEBUG
140 #include <net/ip.h>
141 #include <net/route.h>
142 #include <net/tcp.h>
143 #include <linux/netfilter_ipv4.h>
144
145 static void debug_print_hooks_ip(unsigned int nf_debug)
146 {
147         if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
148                 printk("PRE_ROUTING ");
149                 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
150         }
151         if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
152                 printk("LOCAL_IN ");
153                 nf_debug ^= (1 << NF_IP_LOCAL_IN);
154         }
155         if (nf_debug & (1 << NF_IP_FORWARD)) {
156                 printk("FORWARD ");
157                 nf_debug ^= (1 << NF_IP_FORWARD);
158         }
159         if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
160                 printk("LOCAL_OUT ");
161                 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
162         }
163         if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
164                 printk("POST_ROUTING ");
165                 nf_debug ^= (1 << NF_IP_POST_ROUTING);
166         }
167         if (nf_debug)
168                 printk("Crap bits: 0x%04X", nf_debug);
169         printk("\n");
170 }
171
172 void nf_dump_skb(int pf, struct sk_buff *skb)
173 {
174         printk("skb: pf=%i %s dev=%s len=%u\n", 
175                pf,
176                skb->sk ? "(owned)" : "(unowned)",
177                skb->dev ? skb->dev->name : "(no dev)",
178                skb->len);
179         switch (pf) {
180         case PF_INET: {
181                 const struct iphdr *ip = skb->nh.iph;
182                 __u32 *opt = (__u32 *) (ip + 1);
183                 int opti;
184                 __u16 src_port = 0, dst_port = 0;
185
186                 if (ip->protocol == IPPROTO_TCP
187                     || ip->protocol == IPPROTO_UDP) {
188                         struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
189                         src_port = ntohs(tcp->source);
190                         dst_port = ntohs(tcp->dest);
191                 }
192         
193                 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
194                        " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
195                        ip->protocol, NIPQUAD(ip->saddr),
196                        src_port, NIPQUAD(ip->daddr),
197                        dst_port,
198                        ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
199                        ntohs(ip->frag_off), ip->ttl);
200
201                 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
202                         printk(" O=0x%8.8X", *opt++);
203                 printk("\n");
204         }
205         }
206 }
207
208 void nf_debug_ip_local_deliver(struct sk_buff *skb)
209 {
210         /* If it's a loopback packet, it must have come through
211          * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
212          * NF_IP_LOCAL_IN.  Otherwise, must have gone through
213          * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
214         if (!skb->dev) {
215                 printk("ip_local_deliver: skb->dev is NULL.\n");
216         }
217         else if (strcmp(skb->dev->name, "lo") == 0) {
218                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
219                                       | (1 << NF_IP_POST_ROUTING)
220                                       | (1 << NF_IP_PRE_ROUTING)
221                                       | (1 << NF_IP_LOCAL_IN))) {
222                         printk("ip_local_deliver: bad loopback skb: ");
223                         debug_print_hooks_ip(skb->nf_debug);
224                         nf_dump_skb(PF_INET, skb);
225                 }
226         }
227         else {
228                 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
229                                       | (1<<NF_IP_LOCAL_IN))) {
230                         printk("ip_local_deliver: bad non-lo skb: ");
231                         debug_print_hooks_ip(skb->nf_debug);
232                         nf_dump_skb(PF_INET, skb);
233                 }
234         }
235 }
236
237 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
238 {
239         if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
240                                  | (1 << NF_IP_POST_ROUTING))) {
241                 printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
242                        newskb);
243                 debug_print_hooks_ip(newskb->nf_debug);
244                 nf_dump_skb(PF_INET, newskb);
245         }
246         /* Clear to avoid confusing input check */
247         newskb->nf_debug = 0;
248 }
249
250 void nf_debug_ip_finish_output2(struct sk_buff *skb)
251 {
252         /* If it's owned, it must have gone through the
253          * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
254          * Otherwise, must have gone through
255          * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
256          */
257         if (skb->sk) {
258                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
259                                       | (1 << NF_IP_POST_ROUTING))) {
260                         printk("ip_finish_output: bad owned skb = %p: ", skb);
261                         debug_print_hooks_ip(skb->nf_debug);
262                         nf_dump_skb(PF_INET, skb);
263                 }
264         } else {
265                 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
266                                       | (1 << NF_IP_FORWARD)
267                                       | (1 << NF_IP_POST_ROUTING))) {
268                         /* Fragments, entunnelled packets, TCP RSTs
269                            generated by ipt_REJECT will have no
270                            owners, but still may be local */
271                         if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
272                                               | (1 << NF_IP_POST_ROUTING))){
273                                 printk("ip_finish_output:"
274                                        " bad unowned skb = %p: ",skb);
275                                 debug_print_hooks_ip(skb->nf_debug);
276                                 nf_dump_skb(PF_INET, skb);
277                         }
278                 }
279         }
280 }
281 #endif /*CONFIG_NETFILTER_DEBUG*/
282
283 /* Call get/setsockopt() */
284 static int nf_sockopt(struct sock *sk, int pf, int val, 
285                       char *opt, int *len, int get)
286 {
287         struct list_head *i;
288         struct nf_sockopt_ops *ops;
289         int ret;
290
291         if (down_interruptible(&nf_sockopt_mutex) != 0)
292                 return -EINTR;
293
294         for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
295                 ops = (struct nf_sockopt_ops *)i;
296                 if (ops->pf == pf) {
297                         if (get) {
298                                 if (val >= ops->get_optmin
299                                     && val < ops->get_optmax) {
300                                         ops->use++;
301                                         up(&nf_sockopt_mutex);
302                                         ret = ops->get(sk, val, opt, len);
303                                         goto out;
304                                 }
305                         } else {
306                                 if (val >= ops->set_optmin
307                                     && val < ops->set_optmax) {
308                                         ops->use++;
309                                         up(&nf_sockopt_mutex);
310                                         ret = ops->set(sk, val, opt, *len);
311                                         goto out;
312                                 }
313                         }
314                 }
315         }
316         up(&nf_sockopt_mutex);
317         return -ENOPROTOOPT;
318         
319  out:
320         down(&nf_sockopt_mutex);
321         ops->use--;
322         if (ops->cleanup_task)
323                 wake_up_process(ops->cleanup_task);
324         up(&nf_sockopt_mutex);
325         return ret;
326 }
327
328 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
329                   int len)
330 {
331         return nf_sockopt(sk, pf, val, opt, &len, 0);
332 }
333
334 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
335 {
336         return nf_sockopt(sk, pf, val, opt, len, 1);
337 }
338
339 static unsigned int nf_iterate(struct list_head *head,
340                                struct sk_buff **skb,
341                                int hook,
342                                const struct net_device *indev,
343                                const struct net_device *outdev,
344                                struct list_head **i,
345                                int (*okfn)(struct sk_buff *))
346 {
347         for (*i = (*i)->next; *i != head; *i = (*i)->next) {
348                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
349                 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
350                 case NF_QUEUE:
351                         return NF_QUEUE;
352
353                 case NF_STOLEN:
354                         return NF_STOLEN;
355
356                 case NF_DROP:
357                         return NF_DROP;
358
359                 case NF_REPEAT:
360                         *i = (*i)->prev;
361                         break;
362
363 #ifdef CONFIG_NETFILTER_DEBUG
364                 case NF_ACCEPT:
365                         break;
366
367                 default:
368                         NFDEBUG("Evil return from %p(%u).\n", 
369                                 elem->hook, hook);
370 #endif
371                 }
372         }
373         return NF_ACCEPT;
374 }
375
376 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
377 {      
378         int ret;
379
380         br_write_lock_bh(BR_NETPROTO_LOCK);
381         if (queue_handler[pf].outfn)
382                 ret = -EBUSY;
383         else {
384                 queue_handler[pf].outfn = outfn;
385                 queue_handler[pf].data = data;
386                 ret = 0;
387         }
388         br_write_unlock_bh(BR_NETPROTO_LOCK);
389
390         return ret;
391 }
392
393 /* The caller must flush their queue before this */
394 int nf_unregister_queue_handler(int pf)
395 {
396         br_write_lock_bh(BR_NETPROTO_LOCK);
397         queue_handler[pf].outfn = NULL;
398         queue_handler[pf].data = NULL;
399         br_write_unlock_bh(BR_NETPROTO_LOCK);
400         return 0;
401 }
402
403 /* 
404  * Any packet that leaves via this function must come back 
405  * through nf_reinject().
406  */
407 static void nf_queue(struct sk_buff *skb, 
408                      struct list_head *elem, 
409                      int pf, unsigned int hook,
410                      struct net_device *indev,
411                      struct net_device *outdev,
412                      int (*okfn)(struct sk_buff *))
413 {
414         int status;
415         struct nf_info *info;
416
417         if (!queue_handler[pf].outfn) {
418                 kfree_skb(skb);
419                 return;
420         }
421
422         info = kmalloc(sizeof(*info), GFP_ATOMIC);
423         if (!info) {
424                 if (net_ratelimit())
425                         printk(KERN_ERR "OOM queueing packet %p\n",
426                                skb);
427                 kfree_skb(skb);
428                 return;
429         }
430
431         *info = (struct nf_info) { 
432                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
433
434         /* Bump dev refs so they don't vanish while packet is out */
435         if (indev) dev_hold(indev);
436         if (outdev) dev_hold(outdev);
437
438         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
439         if (status < 0) {
440                 /* James M doesn't say fuck enough. */
441                 if (indev) dev_put(indev);
442                 if (outdev) dev_put(outdev);
443                 kfree(info);
444                 kfree_skb(skb);
445                 return;
446         }
447 }
448
449 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
450                  struct net_device *indev,
451                  struct net_device *outdev,
452                  int (*okfn)(struct sk_buff *))
453 {
454         struct list_head *elem;
455         unsigned int verdict;
456         int ret = 0;
457
458         /* This stopgap cannot be removed until all the hooks are audited. */
459         if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
460                 kfree_skb(skb);
461                 return -ENOMEM;
462         }
463         if (skb->ip_summed == CHECKSUM_HW) {
464                 if (outdev == NULL) {
465                         skb->ip_summed = CHECKSUM_NONE;
466                 } else {
467                         skb_checksum_help(skb);
468                 }
469         }
470
471         /* We may already have this, but read-locks nest anyway */
472         br_read_lock_bh(BR_NETPROTO_LOCK);
473
474 #ifdef CONFIG_NETFILTER_DEBUG
475         if (skb->nf_debug & (1 << hook)) {
476                 printk("nf_hook: hook %i already set.\n", hook);
477                 nf_dump_skb(pf, skb);
478         }
479         skb->nf_debug |= (1 << hook);
480 #endif
481
482         elem = &nf_hooks[pf][hook];
483         verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
484                              outdev, &elem, okfn);
485         if (verdict == NF_QUEUE) {
486                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
487                 nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
488         }
489
490         switch (verdict) {
491         case NF_ACCEPT:
492                 ret = okfn(skb);
493                 break;
494
495         case NF_DROP:
496                 kfree_skb(skb);
497                 ret = -EPERM;
498                 break;
499         }
500
501         br_read_unlock_bh(BR_NETPROTO_LOCK);
502         return ret;
503 }
504
505 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
506                  unsigned int verdict)
507 {
508         struct list_head *elem = &info->elem->list;
509         struct list_head *i;
510
511         /* We don't have BR_NETPROTO_LOCK here */
512         br_read_lock_bh(BR_NETPROTO_LOCK);
513         for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
514                 if (i == &nf_hooks[info->pf][info->hook]) {
515                         /* The module which sent it to userspace is gone. */
516                         NFDEBUG("%s: module disappeared, dropping packet.\n",
517                                  __FUNCTION__);
518                         verdict = NF_DROP;
519                         break;
520                 }
521         }
522
523         /* Continue traversal iff userspace said ok... */
524         if (verdict == NF_REPEAT) {
525                 elem = elem->prev;
526                 verdict = NF_ACCEPT;
527         }
528
529         if (verdict == NF_ACCEPT) {
530                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
531                                      &skb, info->hook, 
532                                      info->indev, info->outdev, &elem,
533                                      info->okfn);
534         }
535
536         switch (verdict) {
537         case NF_ACCEPT:
538                 info->okfn(skb);
539                 break;
540
541         case NF_QUEUE:
542                 nf_queue(skb, elem, info->pf, info->hook, 
543                          info->indev, info->outdev, info->okfn);
544                 break;
545
546         case NF_DROP:
547                 kfree_skb(skb);
548                 break;
549         }
550         br_read_unlock_bh(BR_NETPROTO_LOCK);
551
552         /* Release those devices we held, or Alexey will kill me. */
553         if (info->indev) dev_put(info->indev);
554         if (info->outdev) dev_put(info->outdev);
555         
556         kfree(info);
557         return;
558 }
559
560 #ifdef CONFIG_INET
561 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
562 int ip_route_me_harder(struct sk_buff **pskb)
563 {
564         struct iphdr *iph = (*pskb)->nh.iph;
565         struct rtable *rt;
566         struct rt_key key = {};
567         struct dst_entry *odst;
568         unsigned int hh_len;
569
570         /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
571          * packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
572          */
573         if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
574                 key.dst = iph->daddr;
575                 key.src = iph->saddr;
576                 key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
577                 key.tos = RT_TOS(iph->tos);
578 #ifdef CONFIG_IP_ROUTE_FWMARK
579                 key.fwmark = (*pskb)->nfmark;
580 #endif
581                 if (ip_route_output_key(&rt, &key) != 0)
582                         return -1;
583
584                 /* Drop old route. */
585                 dst_release((*pskb)->dst);
586                 (*pskb)->dst = &rt->u.dst;
587         } else {
588                 /* non-local src, find valid iif to satisfy
589                  * rp-filter when calling ip_route_input. */
590                 key.dst = iph->saddr;
591                 if (ip_route_output_key(&rt, &key) != 0)
592                         return -1;
593
594                 odst = (*pskb)->dst;
595                 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
596                                    RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
597                         dst_release(&rt->u.dst);
598                         return -1;
599                 }
600                 dst_release(&rt->u.dst);
601                 dst_release(odst);
602         }
603         
604         if ((*pskb)->dst->error)
605                 return -1;
606
607         /* Change in oif may mean change in hh_len. */
608         hh_len = (*pskb)->dst->dev->hard_header_len;
609         if (skb_headroom(*pskb) < hh_len) {
610                 struct sk_buff *nskb;
611
612                 nskb = skb_realloc_headroom(*pskb, hh_len);
613                 if (!nskb)
614                         return -1;
615                 if ((*pskb)->sk)
616                         skb_set_owner_w(nskb, (*pskb)->sk);
617                 kfree_skb(*pskb);
618                 *pskb = nskb;
619         }
620
621         return 0;
622 }
623 #endif /*CONFIG_INET*/
624
625 /* This does not belong here, but locally generated errors need it if connection
626    tracking in use: without this, connection may not be in hash table, and hence
627    manufactured ICMP or RST packets will not be associated with it. */
628 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
629
630 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
631 {
632         void (*attach)(struct sk_buff *, struct nf_ct_info *);
633
634         if (skb->nfct && (attach = ip_ct_attach) != NULL) {
635                 mb(); /* Just to be sure: must be read before executing this */
636                 attach(new, skb->nfct);
637         }
638 }
639
640 void __init netfilter_init(void)
641 {
642         int i, h;
643
644         for (i = 0; i < NPROTO; i++) {
645                 for (h = 0; h < NF_MAX_HOOKS; h++)
646                         INIT_LIST_HEAD(&nf_hooks[i][h]);
647         }
648 }