1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 /* For ERR_PTR(). Yeah, I know... --RR */
40 /* This rwlock protects the main hash table, protocol/helper/expected
41 registrations, conntrack timers*/
42 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
43 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45 #include <linux/netfilter_ipv4/ip_conntrack.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
49 #include <linux/netfilter_ipv4/listhelp.h>
51 #define IP_CONNTRACK_VERSION "2.1"
56 #define DEBUGP(format, args...)
59 DECLARE_RWLOCK(ip_conntrack_lock);
60 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
62 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
63 LIST_HEAD(ip_conntrack_expect_list);
64 LIST_HEAD(protocol_list);
65 static LIST_HEAD(helpers);
66 unsigned int ip_conntrack_htable_size = 0;
67 #ifdef CONFIG_MIPS_BRCM
68 int ip_conntrack_max=0;
70 static int ip_conntrack_max=0;
72 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
73 struct list_head *ip_conntrack_hash;
74 static kmem_cache_t *ip_conntrack_cachep;
75 struct ip_conntrack ip_conntrack_untracked;
77 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
79 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
82 return protocol == curr->proto;
85 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
87 struct ip_conntrack_protocol *p;
89 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
90 p = LIST_FIND(&protocol_list, proto_cmpfn,
91 struct ip_conntrack_protocol *, protocol);
93 p = &ip_conntrack_generic_protocol;
98 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
100 struct ip_conntrack_protocol *p;
102 READ_LOCK(&ip_conntrack_lock);
103 p = __ip_ct_find_proto(protocol);
104 READ_UNLOCK(&ip_conntrack_lock);
109 ip_conntrack_put(struct ip_conntrack *ct)
112 IP_NF_ASSERT(ct->infos[0].master);
113 /* nf_conntrack_put wants to go via an info struct, so feed it
115 nf_conntrack_put(&ct->infos[0]);
118 static int ip_conntrack_hash_rnd_initted;
119 static unsigned int ip_conntrack_hash_rnd;
122 hash_conntrack(const struct ip_conntrack_tuple *tuple)
127 return (jhash_3words(tuple->src.ip,
128 (tuple->dst.ip ^ tuple->dst.protonum),
129 (tuple->src.u.all | (tuple->dst.u.all << 16)),
130 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
134 get_tuple(const struct iphdr *iph,
135 const struct sk_buff *skb,
136 unsigned int dataoff,
137 struct ip_conntrack_tuple *tuple,
138 const struct ip_conntrack_protocol *protocol)
141 if (iph->frag_off & htons(IP_OFFSET)) {
142 printk("ip_conntrack_core: Frag of proto %u.\n",
147 tuple->src.ip = iph->saddr;
148 tuple->dst.ip = iph->daddr;
149 tuple->dst.protonum = iph->protocol;
150 tuple->src.u.all = tuple->dst.u.all = 0;
152 return protocol->pkt_to_tuple(skb, dataoff, tuple);
156 invert_tuple(struct ip_conntrack_tuple *inverse,
157 const struct ip_conntrack_tuple *orig,
158 const struct ip_conntrack_protocol *protocol)
160 inverse->src.ip = orig->dst.ip;
161 inverse->dst.ip = orig->src.ip;
162 inverse->dst.protonum = orig->dst.protonum;
164 inverse->src.u.all = inverse->dst.u.all = 0;
166 return protocol->invert_tuple(inverse, orig);
170 /* ip_conntrack_expect helper functions */
172 /* Compare tuple parts depending on mask. */
173 static inline int expect_cmp(const struct ip_conntrack_expect *i,
174 const struct ip_conntrack_tuple *tuple)
176 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
177 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
181 destroy_expect(struct ip_conntrack_expect *exp)
183 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
184 IP_NF_ASSERT(atomic_read(&exp->use) == 0);
185 IP_NF_ASSERT(!timer_pending(&exp->timeout));
190 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
194 if (atomic_dec_and_test(&exp->use)) {
195 /* usage count dropped to zero */
200 static inline struct ip_conntrack_expect *
201 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
203 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
204 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
205 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
206 struct ip_conntrack_expect *, tuple);
209 /* Find a expectation corresponding to a tuple. */
210 struct ip_conntrack_expect *
211 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
213 struct ip_conntrack_expect *exp;
215 READ_LOCK(&ip_conntrack_lock);
216 READ_LOCK(&ip_conntrack_expect_tuple_lock);
217 exp = __ip_ct_expect_find(tuple);
219 atomic_inc(&exp->use);
220 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
221 READ_UNLOCK(&ip_conntrack_lock);
226 /* remove one specific expectation from all lists and drop refcount,
227 * does _NOT_ delete the timer. */
228 static void __unexpect_related(struct ip_conntrack_expect *expect)
230 DEBUGP("unexpect_related(%p)\n", expect);
231 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
233 /* we're not allowed to unexpect a confirmed expectation! */
234 IP_NF_ASSERT(!expect->sibling);
236 /* delete from global and local lists */
237 list_del(&expect->list);
238 list_del(&expect->expected_list);
240 /* decrement expect-count of master conntrack */
241 if (expect->expectant)
242 expect->expectant->expecting--;
244 ip_conntrack_expect_put(expect);
247 /* remove one specific expecatation from all lists, drop refcount
249 * This function can _NOT_ be called for confirmed expects! */
250 static void unexpect_related(struct ip_conntrack_expect *expect)
252 IP_NF_ASSERT(expect->expectant);
253 IP_NF_ASSERT(expect->expectant->helper);
254 /* if we are supposed to have a timer, but we can't delete
255 * it: race condition. __unexpect_related will
256 * be calledd by timeout function */
257 if (expect->expectant->helper->timeout
258 && !del_timer(&expect->timeout))
261 __unexpect_related(expect);
264 /* delete all unconfirmed expectations for this conntrack */
265 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
267 struct list_head *exp_entry, *next;
268 struct ip_conntrack_expect *exp;
270 DEBUGP("remove_expectations(%p)\n", ct);
272 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
273 exp = list_entry(exp_entry, struct ip_conntrack_expect,
276 /* we skip established expectations, as we want to delete
277 * the un-established ones only */
279 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
281 /* Indicate that this expectations parent is dead */
282 ip_conntrack_put(exp->expectant);
283 exp->expectant = NULL;
288 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
289 IP_NF_ASSERT(exp->expectant == ct);
291 /* delete expectation from global and private lists */
292 unexpect_related(exp);
297 clean_from_lists(struct ip_conntrack *ct)
301 DEBUGP("clean_from_lists(%p)\n", ct);
302 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
304 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
305 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
306 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
307 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
309 /* Destroy all un-established, pending expectations */
310 remove_expectations(ct, 1);
314 destroy_conntrack(struct nf_conntrack *nfct)
316 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
317 struct ip_conntrack_protocol *proto;
319 DEBUGP("destroy_conntrack(%p)\n", ct);
320 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
321 IP_NF_ASSERT(!timer_pending(&ct->timeout));
323 /* To make sure we don't get any weird locking issues here:
324 * destroy_conntrack() MUST NOT be called with a write lock
325 * to ip_conntrack_lock!!! -HW */
326 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
327 if (proto && proto->destroy)
330 if (ip_conntrack_destroyed)
331 ip_conntrack_destroyed(ct);
333 WRITE_LOCK(&ip_conntrack_lock);
334 /* Make sure don't leave any orphaned expectations lying around */
336 remove_expectations(ct, 1);
338 /* Delete our master expectation */
340 if (ct->master->expectant) {
341 /* can't call __unexpect_related here,
342 * since it would screw up expect_list */
343 list_del(&ct->master->expected_list);
344 master = ct->master->expectant;
348 WRITE_UNLOCK(&ip_conntrack_lock);
351 ip_conntrack_put(master);
353 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
354 kmem_cache_free(ip_conntrack_cachep, ct);
355 atomic_dec(&ip_conntrack_count);
358 static void death_by_timeout(unsigned long ul_conntrack)
360 struct ip_conntrack *ct = (void *)ul_conntrack;
362 WRITE_LOCK(&ip_conntrack_lock);
363 clean_from_lists(ct);
364 WRITE_UNLOCK(&ip_conntrack_lock);
365 ip_conntrack_put(ct);
369 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
370 const struct ip_conntrack_tuple *tuple,
371 const struct ip_conntrack *ignored_conntrack)
373 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
374 return i->ctrack != ignored_conntrack
375 && ip_ct_tuple_equal(tuple, &i->tuple);
378 static struct ip_conntrack_tuple_hash *
379 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
380 const struct ip_conntrack *ignored_conntrack)
382 struct ip_conntrack_tuple_hash *h;
383 unsigned int hash = hash_conntrack(tuple);
385 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
386 h = LIST_FIND(&ip_conntrack_hash[hash],
388 struct ip_conntrack_tuple_hash *,
389 tuple, ignored_conntrack);
393 /* Find a connection corresponding to a tuple. */
394 struct ip_conntrack_tuple_hash *
395 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
396 const struct ip_conntrack *ignored_conntrack)
398 struct ip_conntrack_tuple_hash *h;
400 READ_LOCK(&ip_conntrack_lock);
401 h = __ip_conntrack_find(tuple, ignored_conntrack);
403 atomic_inc(&h->ctrack->ct_general.use);
404 READ_UNLOCK(&ip_conntrack_lock);
409 static inline struct ip_conntrack *
410 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
412 struct ip_conntrack *ct
413 = (struct ip_conntrack *)nfct->master;
415 /* ctinfo is the index of the nfct inside the conntrack */
416 *ctinfo = nfct - ct->infos;
417 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
421 /* Return conntrack and conntrack_info given skb->nfct->master */
422 struct ip_conntrack *
423 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
426 return __ip_conntrack_get(skb->nfct, ctinfo);
430 /* Confirm a connection given skb->nfct; places it in hash table */
432 __ip_conntrack_confirm(struct nf_ct_info *nfct)
434 unsigned int hash, repl_hash;
435 struct ip_conntrack *ct;
436 enum ip_conntrack_info ctinfo;
438 ct = __ip_conntrack_get(nfct, &ctinfo);
440 /* ipt_REJECT uses ip_conntrack_attach to attach related
441 ICMP/TCP RST packets in other direction. Actual packet
442 which created connection will be IP_CT_NEW or for an
443 expected connection, IP_CT_RELATED. */
444 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
447 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
448 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
450 /* We're not in hash table, and we refuse to set up related
451 connections for unconfirmed conns. But packet copies and
452 REJECT will give spurious warnings here. */
453 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
455 /* No external references means noone else could have
457 IP_NF_ASSERT(!is_confirmed(ct));
458 DEBUGP("Confirming conntrack %p\n", ct);
460 WRITE_LOCK(&ip_conntrack_lock);
461 /* See if there's one in the list already, including reverse:
462 NAT could have grabbed it without realizing, since we're
463 not in the hash. If there is, we lost race. */
464 if (!LIST_FIND(&ip_conntrack_hash[hash],
466 struct ip_conntrack_tuple_hash *,
467 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
468 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
470 struct ip_conntrack_tuple_hash *,
471 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
472 list_prepend(&ip_conntrack_hash[hash],
473 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
474 list_prepend(&ip_conntrack_hash[repl_hash],
475 &ct->tuplehash[IP_CT_DIR_REPLY]);
476 /* Timer relative to confirmation time, not original
477 setting time, otherwise we'd get timer wrap in
478 weird delay cases. */
479 ct->timeout.expires += jiffies;
480 add_timer(&ct->timeout);
481 atomic_inc(&ct->ct_general.use);
482 set_bit(IPS_CONFIRMED_BIT, &ct->status);
483 WRITE_UNLOCK(&ip_conntrack_lock);
487 WRITE_UNLOCK(&ip_conntrack_lock);
491 /* Returns true if a connection correspondings to the tuple (required
494 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
495 const struct ip_conntrack *ignored_conntrack)
497 struct ip_conntrack_tuple_hash *h;
499 READ_LOCK(&ip_conntrack_lock);
500 h = __ip_conntrack_find(tuple, ignored_conntrack);
501 READ_UNLOCK(&ip_conntrack_lock);
506 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
507 struct ip_conntrack *
508 icmp_error_track(struct sk_buff *skb,
509 enum ip_conntrack_info *ctinfo,
510 unsigned int hooknum)
512 struct ip_conntrack_tuple innertuple, origtuple;
517 struct ip_conntrack_protocol *innerproto;
518 struct ip_conntrack_tuple_hash *h;
521 IP_NF_ASSERT(skb->nfct == NULL);
523 /* Not enough header? */
524 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
527 if (inside.icmp.type != ICMP_DEST_UNREACH
528 && inside.icmp.type != ICMP_SOURCE_QUENCH
529 && inside.icmp.type != ICMP_TIME_EXCEEDED
530 && inside.icmp.type != ICMP_PARAMETERPROB
531 && inside.icmp.type != ICMP_REDIRECT)
534 /* Ignore ICMP's containing fragments (shouldn't happen) */
535 if (inside.ip.frag_off & htons(IP_OFFSET)) {
536 DEBUGP("icmp_error_track: fragment of proto %u\n",
541 innerproto = ip_ct_find_proto(inside.ip.protocol);
542 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
543 /* Are they talking about one of our connections? */
544 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
545 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
549 /* Ordinarily, we'd expect the inverted tupleproto, but it's
550 been preserved inside the ICMP. */
551 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
552 DEBUGP("icmp_error_track: Can't invert tuple\n");
556 *ctinfo = IP_CT_RELATED;
558 h = ip_conntrack_find_get(&innertuple, NULL);
560 /* Locally generated ICMPs will match inverted if they
561 haven't been SNAT'ed yet */
562 /* FIXME: NAT code has to handle half-done double NAT --RR */
563 if (hooknum == NF_IP_LOCAL_OUT)
564 h = ip_conntrack_find_get(&origtuple, NULL);
567 DEBUGP("icmp_error_track: no match\n");
570 /* Reverse direction from that found */
571 if (DIRECTION(h) != IP_CT_DIR_REPLY)
572 *ctinfo += IP_CT_IS_REPLY;
574 if (DIRECTION(h) == IP_CT_DIR_REPLY)
575 *ctinfo += IP_CT_IS_REPLY;
578 /* Update skb to refer to this connection */
579 skb->nfct = &h->ctrack->infos[*ctinfo];
583 /* There's a small race here where we may free a just-assured
584 connection. Too bad: we're in trouble anyway. */
585 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
587 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
590 static int early_drop(struct list_head *chain)
592 /* Traverse backwards: gives us oldest, which is roughly LRU */
593 struct ip_conntrack_tuple_hash *h;
596 READ_LOCK(&ip_conntrack_lock);
597 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
599 atomic_inc(&h->ctrack->ct_general.use);
600 READ_UNLOCK(&ip_conntrack_lock);
605 if (del_timer(&h->ctrack->timeout)) {
606 death_by_timeout((unsigned long)h->ctrack);
609 ip_conntrack_put(h->ctrack);
613 #if defined(CONFIG_MIPS_BRCM)
614 static inline int regardless(const struct ip_conntrack_tuple_hash *i)
619 static int regardless_drop(struct list_head *chain)
621 /* Traverse backwards: gives us oldest, which is roughly LRU */
622 struct ip_conntrack_tuple_hash *h;
625 READ_LOCK(&ip_conntrack_lock);
626 h = LIST_FIND_B(chain, regardless, struct ip_conntrack_tuple_hash *);
628 atomic_inc(&h->ctrack->ct_general.use);
629 READ_UNLOCK(&ip_conntrack_lock);
634 if (del_timer(&h->ctrack->timeout)) {
635 death_by_timeout((unsigned long)h->ctrack);
638 ip_conntrack_put(h->ctrack);
643 static inline int helper_cmp(const struct ip_conntrack_helper *i,
644 const struct ip_conntrack_tuple *rtuple)
646 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
649 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
651 return LIST_FIND(&helpers, helper_cmp,
652 struct ip_conntrack_helper *,
656 /* Allocate a new conntrack: we return -ENOMEM if classification
657 failed due to stress. Otherwise it really is unclassifiable. */
658 static struct ip_conntrack_tuple_hash *
659 init_conntrack(const struct ip_conntrack_tuple *tuple,
660 struct ip_conntrack_protocol *protocol,
663 struct ip_conntrack *conntrack;
664 struct ip_conntrack_tuple repl_tuple;
666 struct ip_conntrack_expect *expected;
668 static unsigned int drop_next;
670 if (!ip_conntrack_hash_rnd_initted) {
671 get_random_bytes(&ip_conntrack_hash_rnd, 4);
672 ip_conntrack_hash_rnd_initted = 1;
675 hash = hash_conntrack(tuple);
677 if (ip_conntrack_max &&
678 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
679 /* Try dropping from random chain, or else from the
680 chain about to put into (in case they're trying to
681 bomb one hash chain). */
682 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
684 if (!early_drop(&ip_conntrack_hash[next])
685 && !early_drop(&ip_conntrack_hash[hash])) {
686 #if defined(CONFIG_MIPS_BRCM)
687 /* Sorry, we have to kick one out regardless. */
688 while (!regardless_drop(&ip_conntrack_hash[next]))
689 next = (drop_next++)%ip_conntrack_htable_size;
693 "ip_conntrack: table full, dropping"
695 return ERR_PTR(-ENOMEM);
700 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
701 DEBUGP("Can't invert tuple.\n");
705 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
707 DEBUGP("Can't allocate conntrack.\n");
708 return ERR_PTR(-ENOMEM);
711 memset(conntrack, 0, sizeof(*conntrack));
712 atomic_set(&conntrack->ct_general.use, 1);
713 conntrack->ct_general.destroy = destroy_conntrack;
714 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
715 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
716 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
717 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
718 for (i=0; i < IP_CT_NUMBER; i++)
719 conntrack->infos[i].master = &conntrack->ct_general;
721 if (!protocol->new(conntrack, skb)) {
722 kmem_cache_free(ip_conntrack_cachep, conntrack);
725 /* Don't set timer yet: wait for confirmation */
726 init_timer(&conntrack->timeout);
727 conntrack->timeout.data = (unsigned long)conntrack;
728 conntrack->timeout.function = death_by_timeout;
730 INIT_LIST_HEAD(&conntrack->sibling_list);
732 WRITE_LOCK(&ip_conntrack_lock);
733 /* Need finding and deleting of expected ONLY if we win race */
734 READ_LOCK(&ip_conntrack_expect_tuple_lock);
735 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
736 struct ip_conntrack_expect *, tuple);
737 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
739 /* If master is not in hash table yet (ie. packet hasn't left
740 this machine yet), how can other end know about expected?
741 Hence these are not the droids you are looking for (if
742 master ct never got confirmed, we'd hold a reference to it
743 and weird things would happen to future packets). */
744 if (expected && !is_confirmed(expected->expectant))
747 /* Look up the conntrack helper for master connections only */
749 conntrack->helper = ip_ct_find_helper(&repl_tuple);
751 /* If the expectation is dying, then this is a loser. */
753 && expected->expectant->helper->timeout
754 && ! del_timer(&expected->timeout))
758 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
759 conntrack, expected);
760 /* Welcome, Mr. Bond. We've been expecting you... */
761 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
762 conntrack->master = expected;
763 expected->sibling = conntrack;
764 #if CONFIG_IP_NF_CONNTRACK_MARK
765 conntrack->mark = expected->expectant->mark;
767 LIST_DELETE(&ip_conntrack_expect_list, expected);
768 expected->expectant->expecting--;
769 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
771 atomic_inc(&ip_conntrack_count);
772 WRITE_UNLOCK(&ip_conntrack_lock);
774 if (expected && expected->expectfn)
775 expected->expectfn(conntrack);
776 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
779 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
780 static inline struct ip_conntrack *
781 resolve_normal_ct(struct sk_buff *skb,
782 struct ip_conntrack_protocol *proto,
784 unsigned int hooknum,
785 enum ip_conntrack_info *ctinfo)
787 struct ip_conntrack_tuple tuple;
788 struct ip_conntrack_tuple_hash *h;
790 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
792 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
795 /* look for tuple match */
796 h = ip_conntrack_find_get(&tuple, NULL);
798 h = init_conntrack(&tuple, proto, skb);
805 /* It exists; we have (non-exclusive) reference. */
806 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
807 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
808 /* Please set reply bit if this packet OK */
811 /* Once we've had two way comms, always ESTABLISHED. */
812 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
813 DEBUGP("ip_conntrack_in: normal packet for %p\n",
815 *ctinfo = IP_CT_ESTABLISHED;
816 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
817 DEBUGP("ip_conntrack_in: related packet for %p\n",
819 *ctinfo = IP_CT_RELATED;
821 DEBUGP("ip_conntrack_in: new packet for %p\n",
827 skb->nfct = &h->ctrack->infos[*ctinfo];
831 /* Netfilter hook itself. */
832 unsigned int ip_conntrack_in(unsigned int hooknum,
833 struct sk_buff **pskb,
834 const struct net_device *in,
835 const struct net_device *out,
836 int (*okfn)(struct sk_buff *))
838 struct ip_conntrack *ct;
839 enum ip_conntrack_info ctinfo;
840 struct ip_conntrack_protocol *proto;
845 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
847 if (net_ratelimit()) {
848 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
849 (*pskb)->nh.iph->protocol, hooknum);
854 /* FIXME: Do this right please. --RR */
855 (*pskb)->nfcache |= NFC_UNKNOWN;
857 /* Doesn't cover locally-generated broadcast, so not worth it. */
859 /* Ignore broadcast: no `connection'. */
860 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
861 printk("Broadcast packet!\n");
863 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
864 == htonl(0x000000FF)) {
865 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
866 NIPQUAD((*pskb)->nh.iph->saddr),
867 NIPQUAD((*pskb)->nh.iph->daddr),
868 (*pskb)->sk, (*pskb)->pkt_type);
872 /* Previously seen (loopback or untracked)? Ignore. */
876 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
878 /* It may be an icmp error... */
879 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
880 && icmp_error_track(*pskb, &ctinfo, hooknum))
883 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
884 /* Not valid part of a connection */
888 /* Too stressed to deal. */
891 IP_NF_ASSERT((*pskb)->nfct);
893 ret = proto->packet(ct, *pskb, ctinfo);
896 nf_conntrack_put((*pskb)->nfct);
897 (*pskb)->nfct = NULL;
901 if (ret != NF_DROP && ct->helper) {
902 ret = ct->helper->help(*pskb, ct, ctinfo);
905 nf_conntrack_put((*pskb)->nfct);
906 (*pskb)->nfct = NULL;
911 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
916 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
917 const struct ip_conntrack_tuple *orig)
919 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
922 static inline int resent_expect(const struct ip_conntrack_expect *i,
923 const struct ip_conntrack_tuple *tuple,
924 const struct ip_conntrack_tuple *mask)
926 DEBUGP("resent_expect\n");
927 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
928 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
929 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
930 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
931 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
932 && ip_ct_tuple_equal(&i->mask, mask));
935 /* Would two expected things clash? */
936 static inline int expect_clash(const struct ip_conntrack_expect *i,
937 const struct ip_conntrack_tuple *tuple,
938 const struct ip_conntrack_tuple *mask)
940 /* Part covered by intersection of masks must be unequal,
941 otherwise they clash */
942 struct ip_conntrack_tuple intersect_mask
943 = { { i->mask.src.ip & mask->src.ip,
944 { i->mask.src.u.all & mask->src.u.all } },
945 { i->mask.dst.ip & mask->dst.ip,
946 { i->mask.dst.u.all & mask->dst.u.all },
947 i->mask.dst.protonum & mask->dst.protonum } };
949 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
952 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
954 WRITE_LOCK(&ip_conntrack_lock);
955 unexpect_related(expect);
956 WRITE_UNLOCK(&ip_conntrack_lock);
959 static void expectation_timed_out(unsigned long ul_expect)
961 struct ip_conntrack_expect *expect = (void *) ul_expect;
963 DEBUGP("expectation %p timed out\n", expect);
964 WRITE_LOCK(&ip_conntrack_lock);
965 __unexpect_related(expect);
966 WRITE_UNLOCK(&ip_conntrack_lock);
969 struct ip_conntrack_expect *
970 ip_conntrack_expect_alloc(void)
972 struct ip_conntrack_expect *new;
974 new = (struct ip_conntrack_expect *)
975 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
977 DEBUGP("expect_related: OOM allocating expect\n");
981 /* tuple_cmp compares whole union, we have to initialized cleanly */
982 memset(new, 0, sizeof(struct ip_conntrack_expect));
988 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
989 struct ip_conntrack *related_to)
991 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
992 new->expectant = related_to;
994 atomic_set(&new->use, 1);
996 /* add to expected list for this connection */
997 list_add_tail(&new->expected_list, &related_to->sibling_list);
998 /* add to global list of expectations */
999 list_prepend(&ip_conntrack_expect_list, &new->list);
1000 /* add and start timer if required */
1001 if (related_to->helper->timeout) {
1002 init_timer(&new->timeout);
1003 new->timeout.data = (unsigned long)new;
1004 new->timeout.function = expectation_timed_out;
1005 new->timeout.expires = jiffies +
1006 related_to->helper->timeout * HZ;
1007 add_timer(&new->timeout);
1009 related_to->expecting++;
1012 /* Add a related connection. */
1013 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
1014 struct ip_conntrack *related_to)
1016 struct ip_conntrack_expect *old;
1019 WRITE_LOCK(&ip_conntrack_lock);
1020 /* Because of the write lock, no reader can walk the lists,
1021 * so there is no need to use the tuple lock too */
1023 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1024 DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1025 DEBUGP("mask: "); DUMP_TUPLE_RAW(&expect->mask);
1027 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
1028 struct ip_conntrack_expect *, &expect->tuple,
1031 /* Helper private data may contain offsets but no pointers
1032 pointing into the payload - otherwise we should have to copy
1033 the data filled out by the helper over the old one */
1034 DEBUGP("expect_related: resent packet\n");
1035 if (related_to->helper->timeout) {
1036 if (!del_timer(&old->timeout)) {
1037 /* expectation is dying. Fall through */
1040 old->timeout.expires = jiffies +
1041 related_to->helper->timeout * HZ;
1042 add_timer(&old->timeout);
1046 WRITE_UNLOCK(&ip_conntrack_lock);
1050 } else if (related_to->helper->max_expected &&
1051 related_to->expecting >= related_to->helper->max_expected) {
1053 if (!(related_to->helper->flags &
1054 IP_CT_HELPER_F_REUSE_EXPECT)) {
1055 WRITE_UNLOCK(&ip_conntrack_lock);
1056 if (net_ratelimit())
1058 "ip_conntrack: max number of expected "
1059 "connections %i of %s reached for "
1060 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1061 related_to->helper->max_expected,
1062 related_to->helper->name,
1063 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1064 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1068 DEBUGP("ip_conntrack: max number of expected "
1069 "connections %i of %s reached for "
1070 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1071 related_to->helper->max_expected,
1072 related_to->helper->name,
1073 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1074 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1076 /* choose the the oldest expectation to evict */
1077 list_for_each_entry(old, &related_to->sibling_list,
1079 if (old->sibling == NULL)
1082 /* We cannot fail since related_to->expecting is the number
1083 * of unconfirmed expectations */
1084 IP_NF_ASSERT(old && old->sibling == NULL);
1086 /* newnat14 does not reuse the real allocated memory
1087 * structures but rather unexpects the old and
1088 * allocates a new. unexpect_related will decrement
1089 * related_to->expecting.
1091 unexpect_related(old);
1093 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1094 struct ip_conntrack_expect *, &expect->tuple,
1096 WRITE_UNLOCK(&ip_conntrack_lock);
1097 DEBUGP("expect_related: busy!\n");
1103 out: ip_conntrack_expect_insert(expect, related_to);
1105 WRITE_UNLOCK(&ip_conntrack_lock);
1110 /* Change tuple in an existing expectation */
1111 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1112 struct ip_conntrack_tuple *newtuple)
1116 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1117 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1119 DEBUGP("change_expect:\n");
1120 DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1121 DEBUGP("exp mask: "); DUMP_TUPLE_RAW(&expect->mask);
1122 DEBUGP("newtuple: "); DUMP_TUPLE_RAW(newtuple);
1123 if (expect->ct_tuple.dst.protonum == 0) {
1124 /* Never seen before */
1125 DEBUGP("change expect: never seen before\n");
1126 if (!ip_ct_tuple_mask_cmp(&expect->tuple, newtuple, &expect->mask)
1127 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1128 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1129 /* Force NAT to find an unused tuple */
1132 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1133 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1138 DEBUGP("change expect: resent packet\n");
1139 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1142 /* Force NAT to choose again the same port */
1146 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1151 /* Alter reply tuple (maybe alter helper). If it's already taken,
1152 return 0 and don't do alteration. */
1153 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1154 const struct ip_conntrack_tuple *newreply)
1156 WRITE_LOCK(&ip_conntrack_lock);
1157 if (__ip_conntrack_find(newreply, conntrack)) {
1158 WRITE_UNLOCK(&ip_conntrack_lock);
1161 /* Should be unconfirmed, so not in hash table yet */
1162 IP_NF_ASSERT(!is_confirmed(conntrack));
1164 DEBUGP("Altering reply tuple of %p to ", conntrack);
1165 DUMP_TUPLE(newreply);
1167 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1168 if (!conntrack->master && list_empty(&conntrack->sibling_list))
1169 conntrack->helper = ip_ct_find_helper(newreply);
1170 WRITE_UNLOCK(&ip_conntrack_lock);
1175 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1177 WRITE_LOCK(&ip_conntrack_lock);
1178 list_prepend(&helpers, me);
1179 WRITE_UNLOCK(&ip_conntrack_lock);
1184 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1185 const struct ip_conntrack_helper *me)
1187 if (i->ctrack->helper == me) {
1188 /* Get rid of any expected. */
1189 remove_expectations(i->ctrack, 0);
1190 /* And *then* set helper to NULL */
1191 i->ctrack->helper = NULL;
1196 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1200 /* Need write lock here, to delete helper. */
1201 WRITE_LOCK(&ip_conntrack_lock);
1202 LIST_DELETE(&helpers, me);
1204 /* Get rid of expecteds, set helpers to NULL. */
1205 for (i = 0; i < ip_conntrack_htable_size; i++)
1206 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1207 struct ip_conntrack_tuple_hash *, me);
1208 WRITE_UNLOCK(&ip_conntrack_lock);
1210 /* Someone could be still looking at the helper in a bh. */
1214 /* Refresh conntrack for this many jiffies. */
1215 void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1217 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1219 /* If not in hash table, timer will not be active yet */
1220 if (!is_confirmed(ct))
1221 ct->timeout.expires = extra_jiffies;
1223 WRITE_LOCK(&ip_conntrack_lock);
1224 /* Need del_timer for race avoidance (may already be dying). */
1225 if (del_timer(&ct->timeout)) {
1226 ct->timeout.expires = jiffies + extra_jiffies;
1227 add_timer(&ct->timeout);
1229 WRITE_UNLOCK(&ip_conntrack_lock);
1233 /* Returns new sk_buff, or NULL */
1235 ip_ct_gather_frags(struct sk_buff *skb)
1237 struct sock *sk = skb->sk;
1238 #ifdef CONFIG_NETFILTER_DEBUG
1239 unsigned int olddebug = skb->nf_debug;
1247 skb = ip_defrag(skb);
1257 skb_set_owner_w(skb, sk);
1261 ip_send_check(skb->nh.iph);
1262 skb->nfcache |= NFC_ALTERED;
1263 #ifdef CONFIG_NETFILTER_DEBUG
1264 /* Packet path as if nothing had happened. */
1265 skb->nf_debug = olddebug;
1270 /* Used by ipt_REJECT. */
1271 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1273 struct ip_conntrack *ct;
1274 enum ip_conntrack_info ctinfo;
1276 ct = __ip_conntrack_get(nfct, &ctinfo);
1278 /* This ICMP is in reverse direction to the packet which
1280 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1281 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1283 ctinfo = IP_CT_RELATED;
1285 /* Attach new skbuff, and increment count */
1286 nskb->nfct = &ct->infos[ctinfo];
1287 atomic_inc(&ct->ct_general.use);
1291 do_kill(const struct ip_conntrack_tuple_hash *i,
1292 int (*kill)(const struct ip_conntrack *i, void *data),
1295 return kill(i->ctrack, data);
1298 /* Bring out ya dead! */
1299 static struct ip_conntrack_tuple_hash *
1300 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1301 void *data, unsigned int *bucket)
1303 struct ip_conntrack_tuple_hash *h = NULL;
1305 READ_LOCK(&ip_conntrack_lock);
1306 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1307 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1308 struct ip_conntrack_tuple_hash *, kill, data);
1311 atomic_inc(&h->ctrack->ct_general.use);
1312 READ_UNLOCK(&ip_conntrack_lock);
1318 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1321 struct ip_conntrack_tuple_hash *h;
1322 unsigned int bucket = 0;
1324 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1325 /* Time to push up daises... */
1326 if (del_timer(&h->ctrack->timeout))
1327 death_by_timeout((unsigned long)h->ctrack);
1328 /* ... else the timer will get him soon. */
1330 ip_conntrack_put(h->ctrack);
1334 /* Fast function for those who don't want to parse /proc (and I don't
1336 /* Reversing the socket's dst/src point of view gives us the reply
1339 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1341 struct inet_opt *inet = inet_sk(sk);
1342 struct ip_conntrack_tuple_hash *h;
1343 struct ip_conntrack_tuple tuple;
1345 IP_CT_TUPLE_U_BLANK(&tuple);
1346 tuple.src.ip = inet->rcv_saddr;
1347 tuple.src.u.tcp.port = inet->sport;
1348 tuple.dst.ip = inet->daddr;
1349 tuple.dst.u.tcp.port = inet->dport;
1350 tuple.dst.protonum = IPPROTO_TCP;
1352 /* We only do TCP at the moment: is there a better way? */
1353 if (strcmp(sk->sk_prot->name, "TCP")) {
1354 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1355 return -ENOPROTOOPT;
1358 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1359 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1360 *len, sizeof(struct sockaddr_in));
1364 h = ip_conntrack_find_get(&tuple, NULL);
1366 struct sockaddr_in sin;
1368 sin.sin_family = AF_INET;
1369 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1370 .tuple.dst.u.tcp.port;
1371 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1374 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1375 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1376 ip_conntrack_put(h->ctrack);
1377 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1382 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1383 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1384 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1388 static struct nf_sockopt_ops so_getorigdst = {
1390 .get_optmin = SO_ORIGINAL_DST,
1391 .get_optmax = SO_ORIGINAL_DST+1,
1395 static int kill_all(const struct ip_conntrack *i, void *data)
1400 /* Mishearing the voices in his head, our hero wonders how he's
1401 supposed to kill the mall. */
1402 void ip_conntrack_cleanup(void)
1404 ip_ct_attach = NULL;
1405 /* This makes sure all current packets have passed through
1406 netfilter framework. Roll on, two-stage module
1411 ip_ct_selective_cleanup(kill_all, NULL);
1412 if (atomic_read(&ip_conntrack_count) != 0) {
1414 goto i_see_dead_people;
1417 kmem_cache_destroy(ip_conntrack_cachep);
1418 vfree(ip_conntrack_hash);
1419 nf_unregister_sockopt(&so_getorigdst);
1422 static int hashsize;
1423 MODULE_PARM(hashsize, "i");
1425 int __init ip_conntrack_init(void)
1430 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1431 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1433 ip_conntrack_htable_size = hashsize;
1435 ip_conntrack_htable_size
1436 = (((num_physpages << PAGE_SHIFT) / 16384)
1437 / sizeof(struct list_head));
1438 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1439 ip_conntrack_htable_size = 8192;
1440 if (ip_conntrack_htable_size < 16)
1441 ip_conntrack_htable_size = 16;
1443 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1445 #ifdef CONFIG_MIPS_BRCM
1448 printk("ip_conntrack version %s (%u buckets, %d max)"
1449 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1450 ip_conntrack_htable_size, ip_conntrack_max,
1451 sizeof(struct ip_conntrack));
1453 ret = nf_register_sockopt(&so_getorigdst);
1455 printk(KERN_ERR "Unable to register netfilter socket option\n");
1459 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1460 * ip_conntrack_htable_size);
1461 if (!ip_conntrack_hash) {
1462 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1463 goto err_unreg_sockopt;
1466 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1467 sizeof(struct ip_conntrack), 0,
1468 SLAB_HWCACHE_ALIGN, NULL, NULL);
1469 if (!ip_conntrack_cachep) {
1470 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1473 /* Don't NEED lock here, but good form anyway. */
1474 WRITE_LOCK(&ip_conntrack_lock);
1475 /* Sew in builtin protocols. */
1476 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1477 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1478 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1479 list_append(&protocol_list, &ip_conntrack_protocol_esp);
1480 WRITE_UNLOCK(&ip_conntrack_lock);
1482 for (i = 0; i < ip_conntrack_htable_size; i++)
1483 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1485 /* For use by ipt_REJECT */
1486 ip_ct_attach = ip_conntrack_attach;
1488 /* Set up fake conntrack:
1489 - to never be deleted, not in any hashes */
1490 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1491 /* - and look it like as a confirmed connection */
1492 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1493 /* - and prepare the ctinfo field for REJECT & NAT. */
1494 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1495 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1496 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1497 &ip_conntrack_untracked.ct_general;
1502 vfree(ip_conntrack_hash);
1504 nf_unregister_sockopt(&so_getorigdst);