1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
12 #include <linux/config.h>
13 #include <linux/netfilter.h>
14 #include <net/protocol.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/wait.h>
18 #include <linux/module.h>
19 #include <linux/interrupt.h>
21 #include <linux/netdevice.h>
22 #include <linux/brlock.h>
23 #include <linux/inetdevice.h>
25 #include <net/route.h>
28 #define __KERNEL_SYSCALLS__
29 #include <linux/unistd.h>
31 /* In this code, we can be waiting indefinitely for userspace to
32 * service a packet if a hook returns NF_QUEUE. We could keep a count
33 * of skbuffs queued for userspace, and not deregister a hook unless
34 * this is zero, but that sucks. Now, we simply check when the
35 * packets come back: if the hook is gone, the packet is discarded. */
36 #ifdef CONFIG_NETFILTER_DEBUG
37 #define NFDEBUG(format, args...) printk(format , ## args)
39 #define NFDEBUG(format, args...)
42 /* Sockopts only registered and called from user context, so
43 BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
45 static DECLARE_MUTEX(nf_sockopt_mutex);
47 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
48 static LIST_HEAD(nf_sockopts);
51 * A queue handler may be registered for each protocol. Each is protected by
52 * long term mutex. The handler must provide an an outfn() to accept packets
53 * for queueing and must reinject all packets it receives, no matter what.
55 static struct nf_queue_handler_t {
56 nf_queue_outfn_t outfn;
58 } queue_handler[NPROTO];
60 int nf_register_hook(struct nf_hook_ops *reg)
64 br_write_lock_bh(BR_NETPROTO_LOCK);
65 for (i = nf_hooks[reg->pf][reg->hooknum].next;
66 i != &nf_hooks[reg->pf][reg->hooknum];
68 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
71 list_add(®->list, i->prev);
72 br_write_unlock_bh(BR_NETPROTO_LOCK);
76 void nf_unregister_hook(struct nf_hook_ops *reg)
78 br_write_lock_bh(BR_NETPROTO_LOCK);
80 br_write_unlock_bh(BR_NETPROTO_LOCK);
83 /* Do exclusive ranges overlap? */
84 static inline int overlap(int min1, int max1, int min2, int max2)
86 return max1 > min2 && min1 < max2;
89 /* Functions to register sockopt ranges (exclusive). */
90 int nf_register_sockopt(struct nf_sockopt_ops *reg)
95 if (down_interruptible(&nf_sockopt_mutex) != 0)
98 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
99 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
100 if (ops->pf == reg->pf
101 && (overlap(ops->set_optmin, ops->set_optmax,
102 reg->set_optmin, reg->set_optmax)
103 || overlap(ops->get_optmin, ops->get_optmax,
104 reg->get_optmin, reg->get_optmax))) {
105 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
106 ops->set_optmin, ops->set_optmax,
107 ops->get_optmin, ops->get_optmax,
108 reg->set_optmin, reg->set_optmax,
109 reg->get_optmin, reg->get_optmax);
115 list_add(®->list, &nf_sockopts);
117 up(&nf_sockopt_mutex);
121 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
123 /* No point being interruptible: we're probably in cleanup_module() */
125 down(&nf_sockopt_mutex);
127 /* To be woken by nf_sockopt call... */
128 /* FIXME: Stuart Young's name appears gratuitously. */
129 set_current_state(TASK_UNINTERRUPTIBLE);
130 reg->cleanup_task = current;
131 up(&nf_sockopt_mutex);
135 list_del(®->list);
136 up(&nf_sockopt_mutex);
139 #ifdef CONFIG_NETFILTER_DEBUG
141 #include <net/route.h>
143 #include <linux/netfilter_ipv4.h>
145 static void debug_print_hooks_ip(unsigned int nf_debug)
147 if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
148 printk("PRE_ROUTING ");
149 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
151 if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
153 nf_debug ^= (1 << NF_IP_LOCAL_IN);
155 if (nf_debug & (1 << NF_IP_FORWARD)) {
157 nf_debug ^= (1 << NF_IP_FORWARD);
159 if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
160 printk("LOCAL_OUT ");
161 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
163 if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
164 printk("POST_ROUTING ");
165 nf_debug ^= (1 << NF_IP_POST_ROUTING);
168 printk("Crap bits: 0x%04X", nf_debug);
172 void nf_dump_skb(int pf, struct sk_buff *skb)
174 printk("skb: pf=%i %s dev=%s len=%u\n",
176 skb->sk ? "(owned)" : "(unowned)",
177 skb->dev ? skb->dev->name : "(no dev)",
181 const struct iphdr *ip = skb->nh.iph;
182 __u32 *opt = (__u32 *) (ip + 1);
184 __u16 src_port = 0, dst_port = 0;
186 if (ip->protocol == IPPROTO_TCP
187 || ip->protocol == IPPROTO_UDP) {
188 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
189 src_port = ntohs(tcp->source);
190 dst_port = ntohs(tcp->dest);
193 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
194 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
195 ip->protocol, NIPQUAD(ip->saddr),
196 src_port, NIPQUAD(ip->daddr),
198 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
199 ntohs(ip->frag_off), ip->ttl);
201 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
202 printk(" O=0x%8.8X", *opt++);
208 void nf_debug_ip_local_deliver(struct sk_buff *skb)
210 /* If it's a loopback packet, it must have come through
211 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
212 * NF_IP_LOCAL_IN. Otherwise, must have gone through
213 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
215 printk("ip_local_deliver: skb->dev is NULL.\n");
217 else if (strcmp(skb->dev->name, "lo") == 0) {
218 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
219 | (1 << NF_IP_POST_ROUTING)
220 | (1 << NF_IP_PRE_ROUTING)
221 | (1 << NF_IP_LOCAL_IN))) {
222 printk("ip_local_deliver: bad loopback skb: ");
223 debug_print_hooks_ip(skb->nf_debug);
224 nf_dump_skb(PF_INET, skb);
228 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
229 | (1<<NF_IP_LOCAL_IN))) {
230 printk("ip_local_deliver: bad non-lo skb: ");
231 debug_print_hooks_ip(skb->nf_debug);
232 nf_dump_skb(PF_INET, skb);
237 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
239 if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
240 | (1 << NF_IP_POST_ROUTING))) {
241 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
243 debug_print_hooks_ip(newskb->nf_debug);
244 nf_dump_skb(PF_INET, newskb);
246 /* Clear to avoid confusing input check */
247 newskb->nf_debug = 0;
250 void nf_debug_ip_finish_output2(struct sk_buff *skb)
252 /* If it's owned, it must have gone through the
253 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
254 * Otherwise, must have gone through
255 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
258 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
259 | (1 << NF_IP_POST_ROUTING))) {
260 printk("ip_finish_output: bad owned skb = %p: ", skb);
261 debug_print_hooks_ip(skb->nf_debug);
262 nf_dump_skb(PF_INET, skb);
265 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
266 | (1 << NF_IP_FORWARD)
267 | (1 << NF_IP_POST_ROUTING))) {
268 /* Fragments, entunnelled packets, TCP RSTs
269 generated by ipt_REJECT will have no
270 owners, but still may be local */
271 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
272 | (1 << NF_IP_POST_ROUTING))){
273 printk("ip_finish_output:"
274 " bad unowned skb = %p: ",skb);
275 debug_print_hooks_ip(skb->nf_debug);
276 nf_dump_skb(PF_INET, skb);
281 #endif /*CONFIG_NETFILTER_DEBUG*/
283 /* Call get/setsockopt() */
284 static int nf_sockopt(struct sock *sk, int pf, int val,
285 char *opt, int *len, int get)
288 struct nf_sockopt_ops *ops;
291 if (down_interruptible(&nf_sockopt_mutex) != 0)
294 for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
295 ops = (struct nf_sockopt_ops *)i;
298 if (val >= ops->get_optmin
299 && val < ops->get_optmax) {
301 up(&nf_sockopt_mutex);
302 ret = ops->get(sk, val, opt, len);
306 if (val >= ops->set_optmin
307 && val < ops->set_optmax) {
309 up(&nf_sockopt_mutex);
310 ret = ops->set(sk, val, opt, *len);
316 up(&nf_sockopt_mutex);
320 down(&nf_sockopt_mutex);
322 if (ops->cleanup_task)
323 wake_up_process(ops->cleanup_task);
324 up(&nf_sockopt_mutex);
328 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
331 return nf_sockopt(sk, pf, val, opt, &len, 0);
334 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
336 return nf_sockopt(sk, pf, val, opt, len, 1);
339 static unsigned int nf_iterate(struct list_head *head,
340 struct sk_buff **skb,
342 const struct net_device *indev,
343 const struct net_device *outdev,
344 struct list_head **i,
345 int (*okfn)(struct sk_buff *))
347 for (*i = (*i)->next; *i != head; *i = (*i)->next) {
348 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
349 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
363 #ifdef CONFIG_NETFILTER_DEBUG
368 NFDEBUG("Evil return from %p(%u).\n",
376 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
380 br_write_lock_bh(BR_NETPROTO_LOCK);
381 if (queue_handler[pf].outfn)
384 queue_handler[pf].outfn = outfn;
385 queue_handler[pf].data = data;
388 br_write_unlock_bh(BR_NETPROTO_LOCK);
393 /* The caller must flush their queue before this */
394 int nf_unregister_queue_handler(int pf)
396 br_write_lock_bh(BR_NETPROTO_LOCK);
397 queue_handler[pf].outfn = NULL;
398 queue_handler[pf].data = NULL;
399 br_write_unlock_bh(BR_NETPROTO_LOCK);
404 * Any packet that leaves via this function must come back
405 * through nf_reinject().
407 static void nf_queue(struct sk_buff *skb,
408 struct list_head *elem,
409 int pf, unsigned int hook,
410 struct net_device *indev,
411 struct net_device *outdev,
412 int (*okfn)(struct sk_buff *))
415 struct nf_info *info;
417 if (!queue_handler[pf].outfn) {
422 info = kmalloc(sizeof(*info), GFP_ATOMIC);
425 printk(KERN_ERR "OOM queueing packet %p\n",
431 *info = (struct nf_info) {
432 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
434 /* Bump dev refs so they don't vanish while packet is out */
435 if (indev) dev_hold(indev);
436 if (outdev) dev_hold(outdev);
438 status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
440 /* James M doesn't say fuck enough. */
441 if (indev) dev_put(indev);
442 if (outdev) dev_put(outdev);
449 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
450 struct net_device *indev,
451 struct net_device *outdev,
452 int (*okfn)(struct sk_buff *))
454 struct list_head *elem;
455 unsigned int verdict;
458 /* This stopgap cannot be removed until all the hooks are audited. */
459 if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
463 if (skb->ip_summed == CHECKSUM_HW) {
464 if (outdev == NULL) {
465 skb->ip_summed = CHECKSUM_NONE;
467 skb_checksum_help(skb);
471 /* We may already have this, but read-locks nest anyway */
472 br_read_lock_bh(BR_NETPROTO_LOCK);
474 #ifdef CONFIG_NETFILTER_DEBUG
475 if (skb->nf_debug & (1 << hook)) {
476 printk("nf_hook: hook %i already set.\n", hook);
477 nf_dump_skb(pf, skb);
479 skb->nf_debug |= (1 << hook);
482 elem = &nf_hooks[pf][hook];
483 verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
484 outdev, &elem, okfn);
485 if (verdict == NF_QUEUE) {
486 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
487 nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
501 br_read_unlock_bh(BR_NETPROTO_LOCK);
505 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
506 unsigned int verdict)
508 struct list_head *elem = &info->elem->list;
511 /* We don't have BR_NETPROTO_LOCK here */
512 br_read_lock_bh(BR_NETPROTO_LOCK);
513 for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
514 if (i == &nf_hooks[info->pf][info->hook]) {
515 /* The module which sent it to userspace is gone. */
516 NFDEBUG("%s: module disappeared, dropping packet.\n",
523 /* Continue traversal iff userspace said ok... */
524 if (verdict == NF_REPEAT) {
529 if (verdict == NF_ACCEPT) {
530 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
532 info->indev, info->outdev, &elem,
542 nf_queue(skb, elem, info->pf, info->hook,
543 info->indev, info->outdev, info->okfn);
550 br_read_unlock_bh(BR_NETPROTO_LOCK);
552 /* Release those devices we held, or Alexey will kill me. */
553 if (info->indev) dev_put(info->indev);
554 if (info->outdev) dev_put(info->outdev);
561 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
562 int ip_route_me_harder(struct sk_buff **pskb)
564 struct iphdr *iph = (*pskb)->nh.iph;
566 struct rt_key key = {};
567 struct dst_entry *odst;
570 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
571 * packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
573 if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
574 key.dst = iph->daddr;
575 key.src = iph->saddr;
576 key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
577 key.tos = RT_TOS(iph->tos);
578 #ifdef CONFIG_IP_ROUTE_FWMARK
579 key.fwmark = (*pskb)->nfmark;
581 if (ip_route_output_key(&rt, &key) != 0)
584 /* Drop old route. */
585 dst_release((*pskb)->dst);
586 (*pskb)->dst = &rt->u.dst;
588 /* non-local src, find valid iif to satisfy
589 * rp-filter when calling ip_route_input. */
590 key.dst = iph->saddr;
591 if (ip_route_output_key(&rt, &key) != 0)
595 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
596 RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
597 dst_release(&rt->u.dst);
600 dst_release(&rt->u.dst);
604 if ((*pskb)->dst->error)
607 /* Change in oif may mean change in hh_len. */
608 hh_len = (*pskb)->dst->dev->hard_header_len;
609 if (skb_headroom(*pskb) < hh_len) {
610 struct sk_buff *nskb;
612 nskb = skb_realloc_headroom(*pskb, hh_len);
616 skb_set_owner_w(nskb, (*pskb)->sk);
623 #endif /*CONFIG_INET*/
625 /* This does not belong here, but locally generated errors need it if connection
626 tracking in use: without this, connection may not be in hash table, and hence
627 manufactured ICMP or RST packets will not be associated with it. */
628 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
630 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
632 void (*attach)(struct sk_buff *, struct nf_ct_info *);
634 if (skb->nfct && (attach = ip_ct_attach) != NULL) {
635 mb(); /* Just to be sure: must be read before executing this */
636 attach(new, skb->nfct);
640 void __init netfilter_init(void)
644 for (i = 0; i < NPROTO; i++) {
645 for (h = 0; h < NF_MAX_HOOKS; h++)
646 INIT_LIST_HEAD(&nf_hooks[i][h]);