1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/config.h>
21 #include <linux/types.h>
22 #include <linux/icmp.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/module.h>
27 #include <linux/skbuff.h>
28 #include <linux/proc_fs.h>
29 #include <linux/vmalloc.h>
30 #include <net/checksum.h>
32 #include <linux/stddef.h>
33 #include <linux/sysctl.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/jhash.h>
37 /* For ERR_PTR(). Yeah, I know... --RR */
40 /* This rwlock protects the main hash table, protocol/helper/expected
41 registrations, conntrack timers*/
42 #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
43 #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
45 #include <linux/netfilter_ipv4/ip_conntrack.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
47 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
48 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
49 #include <linux/netfilter_ipv4/listhelp.h>
51 #define IP_CONNTRACK_VERSION "2.1"
56 #define DEBUGP(format, args...)
59 DECLARE_RWLOCK(ip_conntrack_lock);
60 DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
62 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
63 LIST_HEAD(ip_conntrack_expect_list);
64 LIST_HEAD(protocol_list);
65 static LIST_HEAD(helpers);
66 unsigned int ip_conntrack_htable_size = 0;
67 #ifdef CONFIG_MIPS_BRCM
68 int ip_conntrack_max=0;
70 static int ip_conntrack_max=0;
72 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
73 struct list_head *ip_conntrack_hash;
74 static kmem_cache_t *ip_conntrack_cachep;
75 struct ip_conntrack ip_conntrack_untracked;
77 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
79 static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
82 return protocol == curr->proto;
85 struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
87 struct ip_conntrack_protocol *p;
89 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
90 p = LIST_FIND(&protocol_list, proto_cmpfn,
91 struct ip_conntrack_protocol *, protocol);
93 p = &ip_conntrack_generic_protocol;
98 struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
100 struct ip_conntrack_protocol *p;
102 READ_LOCK(&ip_conntrack_lock);
103 p = __ip_ct_find_proto(protocol);
104 READ_UNLOCK(&ip_conntrack_lock);
109 ip_conntrack_put(struct ip_conntrack *ct)
112 IP_NF_ASSERT(ct->infos[0].master);
113 /* nf_conntrack_put wants to go via an info struct, so feed it
115 nf_conntrack_put(&ct->infos[0]);
118 static int ip_conntrack_hash_rnd_initted;
119 static unsigned int ip_conntrack_hash_rnd;
122 hash_conntrack(const struct ip_conntrack_tuple *tuple)
127 return (jhash_3words(tuple->src.ip,
128 (tuple->dst.ip ^ tuple->dst.protonum),
129 (tuple->src.u.all | (tuple->dst.u.all << 16)),
130 ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
134 get_tuple(const struct iphdr *iph,
135 const struct sk_buff *skb,
136 unsigned int dataoff,
137 struct ip_conntrack_tuple *tuple,
138 const struct ip_conntrack_protocol *protocol)
141 if (iph->frag_off & htons(IP_OFFSET)) {
142 printk("ip_conntrack_core: Frag of proto %u.\n",
147 tuple->src.ip = iph->saddr;
148 tuple->dst.ip = iph->daddr;
149 tuple->dst.protonum = iph->protocol;
150 tuple->src.u.all = tuple->dst.u.all = 0;
152 return protocol->pkt_to_tuple(skb, dataoff, tuple);
156 invert_tuple(struct ip_conntrack_tuple *inverse,
157 const struct ip_conntrack_tuple *orig,
158 const struct ip_conntrack_protocol *protocol)
160 inverse->src.ip = orig->dst.ip;
161 inverse->dst.ip = orig->src.ip;
162 inverse->dst.protonum = orig->dst.protonum;
164 inverse->src.u.all = inverse->dst.u.all = 0;
166 return protocol->invert_tuple(inverse, orig);
170 /* ip_conntrack_expect helper functions */
172 /* Compare tuple parts depending on mask. */
173 static inline int expect_cmp(const struct ip_conntrack_expect *i,
174 const struct ip_conntrack_tuple *tuple)
176 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
177 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
181 destroy_expect(struct ip_conntrack_expect *exp)
183 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
184 IP_NF_ASSERT(atomic_read(&exp->use) == 0);
185 IP_NF_ASSERT(!timer_pending(&exp->timeout));
190 inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
194 if (atomic_dec_and_test(&exp->use)) {
195 /* usage count dropped to zero */
200 static inline struct ip_conntrack_expect *
201 __ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
203 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
204 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
205 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
206 struct ip_conntrack_expect *, tuple);
209 /* Find a expectation corresponding to a tuple. */
210 struct ip_conntrack_expect *
211 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
213 struct ip_conntrack_expect *exp;
215 READ_LOCK(&ip_conntrack_lock);
216 READ_LOCK(&ip_conntrack_expect_tuple_lock);
217 exp = __ip_ct_expect_find(tuple);
219 atomic_inc(&exp->use);
220 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
221 READ_UNLOCK(&ip_conntrack_lock);
226 /* remove one specific expectation from all lists and drop refcount,
227 * does _NOT_ delete the timer. */
228 static void __unexpect_related(struct ip_conntrack_expect *expect)
230 DEBUGP("unexpect_related(%p)\n", expect);
231 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
233 /* we're not allowed to unexpect a confirmed expectation! */
234 IP_NF_ASSERT(!expect->sibling);
236 /* delete from global and local lists */
237 list_del(&expect->list);
238 list_del(&expect->expected_list);
240 /* decrement expect-count of master conntrack */
241 if (expect->expectant)
242 expect->expectant->expecting--;
244 ip_conntrack_expect_put(expect);
247 /* remove one specific expecatation from all lists, drop refcount
249 * This function can _NOT_ be called for confirmed expects! */
250 static void unexpect_related(struct ip_conntrack_expect *expect)
252 IP_NF_ASSERT(expect->expectant);
253 IP_NF_ASSERT(expect->expectant->helper);
254 /* if we are supposed to have a timer, but we can't delete
255 * it: race condition. __unexpect_related will
256 * be calledd by timeout function */
257 if (expect->expectant->helper->timeout
258 && !del_timer(&expect->timeout))
261 __unexpect_related(expect);
264 /* delete all unconfirmed expectations for this conntrack */
265 static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
267 struct list_head *exp_entry, *next;
268 struct ip_conntrack_expect *exp;
270 DEBUGP("remove_expectations(%p)\n", ct);
272 list_for_each_safe(exp_entry, next, &ct->sibling_list) {
273 exp = list_entry(exp_entry, struct ip_conntrack_expect,
276 /* we skip established expectations, as we want to delete
277 * the un-established ones only */
279 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
281 /* Indicate that this expectations parent is dead */
282 ip_conntrack_put(exp->expectant);
283 exp->expectant = NULL;
288 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
289 IP_NF_ASSERT(exp->expectant == ct);
291 /* delete expectation from global and private lists */
292 unexpect_related(exp);
297 clean_from_lists(struct ip_conntrack *ct)
301 DEBUGP("clean_from_lists(%p)\n", ct);
302 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
304 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
305 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
306 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
307 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
309 /* Destroy all un-established, pending expectations */
310 remove_expectations(ct, 1);
314 destroy_conntrack(struct nf_conntrack *nfct)
316 struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
317 struct ip_conntrack_protocol *proto;
319 DEBUGP("destroy_conntrack(%p)\n", ct);
320 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
321 IP_NF_ASSERT(!timer_pending(&ct->timeout));
323 /* To make sure we don't get any weird locking issues here:
324 * destroy_conntrack() MUST NOT be called with a write lock
325 * to ip_conntrack_lock!!! -HW */
326 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
327 if (proto && proto->destroy)
330 if (ip_conntrack_destroyed)
331 ip_conntrack_destroyed(ct);
333 WRITE_LOCK(&ip_conntrack_lock);
334 /* Make sure don't leave any orphaned expectations lying around */
336 remove_expectations(ct, 1);
338 /* Delete our master expectation */
340 if (ct->master->expectant) {
341 /* can't call __unexpect_related here,
342 * since it would screw up expect_list */
343 list_del(&ct->master->expected_list);
344 master = ct->master->expectant;
348 WRITE_UNLOCK(&ip_conntrack_lock);
351 ip_conntrack_put(master);
353 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
354 kmem_cache_free(ip_conntrack_cachep, ct);
355 atomic_dec(&ip_conntrack_count);
358 static void death_by_timeout(unsigned long ul_conntrack)
360 struct ip_conntrack *ct = (void *)ul_conntrack;
362 WRITE_LOCK(&ip_conntrack_lock);
363 clean_from_lists(ct);
364 WRITE_UNLOCK(&ip_conntrack_lock);
365 ip_conntrack_put(ct);
369 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
370 const struct ip_conntrack_tuple *tuple,
371 const struct ip_conntrack *ignored_conntrack)
373 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
374 return i->ctrack != ignored_conntrack
375 && ip_ct_tuple_equal(tuple, &i->tuple);
378 static struct ip_conntrack_tuple_hash *
379 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
380 const struct ip_conntrack *ignored_conntrack)
382 struct ip_conntrack_tuple_hash *h;
383 unsigned int hash = hash_conntrack(tuple);
385 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
386 h = LIST_FIND(&ip_conntrack_hash[hash],
388 struct ip_conntrack_tuple_hash *,
389 tuple, ignored_conntrack);
393 /* Find a connection corresponding to a tuple. */
394 struct ip_conntrack_tuple_hash *
395 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
396 const struct ip_conntrack *ignored_conntrack)
398 struct ip_conntrack_tuple_hash *h;
400 READ_LOCK(&ip_conntrack_lock);
401 h = __ip_conntrack_find(tuple, ignored_conntrack);
403 atomic_inc(&h->ctrack->ct_general.use);
404 READ_UNLOCK(&ip_conntrack_lock);
409 static inline struct ip_conntrack *
410 __ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
412 struct ip_conntrack *ct
413 = (struct ip_conntrack *)nfct->master;
415 /* ctinfo is the index of the nfct inside the conntrack */
416 *ctinfo = nfct - ct->infos;
417 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
421 /* Return conntrack and conntrack_info given skb->nfct->master */
422 struct ip_conntrack *
423 ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
426 return __ip_conntrack_get(skb->nfct, ctinfo);
430 /* Confirm a connection given skb->nfct; places it in hash table */
432 __ip_conntrack_confirm(struct nf_ct_info *nfct)
434 unsigned int hash, repl_hash;
435 struct ip_conntrack *ct;
436 enum ip_conntrack_info ctinfo;
438 ct = __ip_conntrack_get(nfct, &ctinfo);
440 /* ipt_REJECT uses ip_conntrack_attach to attach related
441 ICMP/TCP RST packets in other direction. Actual packet
442 which created connection will be IP_CT_NEW or for an
443 expected connection, IP_CT_RELATED. */
444 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
447 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
448 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
450 /* We're not in hash table, and we refuse to set up related
451 connections for unconfirmed conns. But packet copies and
452 REJECT will give spurious warnings here. */
453 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
455 /* No external references means noone else could have
457 IP_NF_ASSERT(!is_confirmed(ct));
458 DEBUGP("Confirming conntrack %p\n", ct);
460 WRITE_LOCK(&ip_conntrack_lock);
461 /* See if there's one in the list already, including reverse:
462 NAT could have grabbed it without realizing, since we're
463 not in the hash. If there is, we lost race. */
464 if (!LIST_FIND(&ip_conntrack_hash[hash],
466 struct ip_conntrack_tuple_hash *,
467 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
468 && !LIST_FIND(&ip_conntrack_hash[repl_hash],
470 struct ip_conntrack_tuple_hash *,
471 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
472 list_prepend(&ip_conntrack_hash[hash],
473 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
474 list_prepend(&ip_conntrack_hash[repl_hash],
475 &ct->tuplehash[IP_CT_DIR_REPLY]);
476 /* Timer relative to confirmation time, not original
477 setting time, otherwise we'd get timer wrap in
478 weird delay cases. */
479 ct->timeout.expires += jiffies;
480 add_timer(&ct->timeout);
481 atomic_inc(&ct->ct_general.use);
482 set_bit(IPS_CONFIRMED_BIT, &ct->status);
483 WRITE_UNLOCK(&ip_conntrack_lock);
487 WRITE_UNLOCK(&ip_conntrack_lock);
491 /* Returns true if a connection correspondings to the tuple (required
494 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
495 const struct ip_conntrack *ignored_conntrack)
497 struct ip_conntrack_tuple_hash *h;
499 READ_LOCK(&ip_conntrack_lock);
500 h = __ip_conntrack_find(tuple, ignored_conntrack);
501 READ_UNLOCK(&ip_conntrack_lock);
506 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
507 struct ip_conntrack *
508 icmp_error_track(struct sk_buff *skb,
509 enum ip_conntrack_info *ctinfo,
510 unsigned int hooknum)
512 struct ip_conntrack_tuple innertuple, origtuple;
517 struct ip_conntrack_protocol *innerproto;
518 struct ip_conntrack_tuple_hash *h;
521 IP_NF_ASSERT(skb->nfct == NULL);
523 /* Not enough header? */
524 if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
527 if (inside.icmp.type != ICMP_DEST_UNREACH
528 && inside.icmp.type != ICMP_SOURCE_QUENCH
529 && inside.icmp.type != ICMP_TIME_EXCEEDED
530 && inside.icmp.type != ICMP_PARAMETERPROB
531 && inside.icmp.type != ICMP_REDIRECT)
534 /* Ignore ICMP's containing fragments (shouldn't happen) */
535 if (inside.ip.frag_off & htons(IP_OFFSET)) {
536 DEBUGP("icmp_error_track: fragment of proto %u\n",
541 innerproto = ip_ct_find_proto(inside.ip.protocol);
542 dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
543 /* Are they talking about one of our connections? */
544 if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
545 DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
549 /* Ordinarily, we'd expect the inverted tupleproto, but it's
550 been preserved inside the ICMP. */
551 if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
552 DEBUGP("icmp_error_track: Can't invert tuple\n");
556 *ctinfo = IP_CT_RELATED;
558 h = ip_conntrack_find_get(&innertuple, NULL);
560 /* Locally generated ICMPs will match inverted if they
561 haven't been SNAT'ed yet */
562 /* FIXME: NAT code has to handle half-done double NAT --RR */
563 if (hooknum == NF_IP_LOCAL_OUT)
564 h = ip_conntrack_find_get(&origtuple, NULL);
567 DEBUGP("icmp_error_track: no match\n");
570 /* Reverse direction from that found */
571 if (DIRECTION(h) != IP_CT_DIR_REPLY)
572 *ctinfo += IP_CT_IS_REPLY;
574 if (DIRECTION(h) == IP_CT_DIR_REPLY)
575 *ctinfo += IP_CT_IS_REPLY;
578 /* Update skb to refer to this connection */
579 skb->nfct = &h->ctrack->infos[*ctinfo];
583 /* There's a small race here where we may free a just-assured
584 connection. Too bad: we're in trouble anyway. */
585 static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
587 return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
590 static int early_drop(struct list_head *chain)
592 /* Traverse backwards: gives us oldest, which is roughly LRU */
593 struct ip_conntrack_tuple_hash *h;
596 READ_LOCK(&ip_conntrack_lock);
597 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
599 atomic_inc(&h->ctrack->ct_general.use);
600 READ_UNLOCK(&ip_conntrack_lock);
605 if (del_timer(&h->ctrack->timeout)) {
606 death_by_timeout((unsigned long)h->ctrack);
609 ip_conntrack_put(h->ctrack);
613 #if defined(CONFIG_MIPS_BRCM)
614 static inline int regardless(const struct ip_conntrack_tuple_hash *i)
619 static int regardless_drop(struct list_head *chain)
621 /* Traverse backwards: gives us oldest, which is roughly LRU */
622 struct ip_conntrack_tuple_hash *h;
625 READ_LOCK(&ip_conntrack_lock);
626 h = LIST_FIND_B(chain, regardless, struct ip_conntrack_tuple_hash *);
628 atomic_inc(&h->ctrack->ct_general.use);
629 READ_UNLOCK(&ip_conntrack_lock);
634 if (del_timer(&h->ctrack->timeout)) {
635 death_by_timeout((unsigned long)h->ctrack);
638 ip_conntrack_put(h->ctrack);
643 static inline int helper_cmp(const struct ip_conntrack_helper *i,
644 const struct ip_conntrack_tuple *rtuple)
646 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
649 struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
651 return LIST_FIND(&helpers, helper_cmp,
652 struct ip_conntrack_helper *,
656 /* Allocate a new conntrack: we return -ENOMEM if classification
657 failed due to stress. Otherwise it really is unclassifiable. */
658 static struct ip_conntrack_tuple_hash *
659 init_conntrack(const struct ip_conntrack_tuple *tuple,
660 struct ip_conntrack_protocol *protocol,
663 struct ip_conntrack *conntrack;
664 struct ip_conntrack_tuple repl_tuple;
666 struct ip_conntrack_expect *expected;
668 static unsigned int drop_next;
670 if (!ip_conntrack_hash_rnd_initted) {
671 get_random_bytes(&ip_conntrack_hash_rnd, 4);
672 ip_conntrack_hash_rnd_initted = 1;
675 hash = hash_conntrack(tuple);
677 if (ip_conntrack_max &&
678 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
679 /* Try dropping from random chain, or else from the
680 chain about to put into (in case they're trying to
681 bomb one hash chain). */
682 unsigned int next = (drop_next++)%ip_conntrack_htable_size;
684 if (!early_drop(&ip_conntrack_hash[next])
685 && !early_drop(&ip_conntrack_hash[hash])) {
686 #if defined(CONFIG_MIPS_BRCM)
687 /* Sorry, we have to kick one out regardless. */
688 while (!regardless_drop(&ip_conntrack_hash[next]))
689 next = (drop_next++)%ip_conntrack_htable_size;
693 "ip_conntrack: table full, dropping"
695 return ERR_PTR(-ENOMEM);
700 if (!invert_tuple(&repl_tuple, tuple, protocol)) {
701 DEBUGP("Can't invert tuple.\n");
705 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
707 DEBUGP("Can't allocate conntrack.\n");
708 return ERR_PTR(-ENOMEM);
711 memset(conntrack, 0, sizeof(*conntrack));
712 atomic_set(&conntrack->ct_general.use, 1);
713 conntrack->ct_general.destroy = destroy_conntrack;
714 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
715 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
716 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
717 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
718 for (i=0; i < IP_CT_NUMBER; i++)
719 conntrack->infos[i].master = &conntrack->ct_general;
721 if (!protocol->new(conntrack, skb)) {
722 kmem_cache_free(ip_conntrack_cachep, conntrack);
725 /* Don't set timer yet: wait for confirmation */
726 init_timer(&conntrack->timeout);
727 conntrack->timeout.data = (unsigned long)conntrack;
728 conntrack->timeout.function = death_by_timeout;
730 INIT_LIST_HEAD(&conntrack->sibling_list);
732 WRITE_LOCK(&ip_conntrack_lock);
733 /* Need finding and deleting of expected ONLY if we win race */
734 READ_LOCK(&ip_conntrack_expect_tuple_lock);
735 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
736 struct ip_conntrack_expect *, tuple);
737 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
739 /* If master is not in hash table yet (ie. packet hasn't left
740 this machine yet), how can other end know about expected?
741 Hence these are not the droids you are looking for (if
742 master ct never got confirmed, we'd hold a reference to it
743 and weird things would happen to future packets). */
744 if (expected && !is_confirmed(expected->expectant))
747 /* Look up the conntrack helper for master connections only */
749 conntrack->helper = ip_ct_find_helper(&repl_tuple);
751 /* If the expectation is dying, then this is a loser. */
753 && expected->expectant->helper->timeout
754 && ! del_timer(&expected->timeout))
758 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
759 conntrack, expected);
760 /* Welcome, Mr. Bond. We've been expecting you... */
761 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
762 conntrack->master = expected;
763 expected->sibling = conntrack;
764 #if CONFIG_IP_NF_CONNTRACK_MARK
765 conntrack->mark = expected->expectant->mark;
767 LIST_DELETE(&ip_conntrack_expect_list, expected);
768 expected->expectant->expecting--;
769 nf_conntrack_get(&master_ct(conntrack)->infos[0]);
771 atomic_inc(&ip_conntrack_count);
772 WRITE_UNLOCK(&ip_conntrack_lock);
774 if (expected && expected->expectfn)
775 expected->expectfn(conntrack);
776 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
779 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
780 static inline struct ip_conntrack *
781 resolve_normal_ct(struct sk_buff *skb,
782 struct ip_conntrack_protocol *proto,
784 unsigned int hooknum,
785 enum ip_conntrack_info *ctinfo)
787 struct ip_conntrack_tuple tuple;
788 struct ip_conntrack_tuple_hash *h;
790 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
792 if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
795 /* look for tuple match */
796 h = ip_conntrack_find_get(&tuple, NULL);
798 h = init_conntrack(&tuple, proto, skb);
805 /* It exists; we have (non-exclusive) reference. */
806 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
807 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
808 /* Please set reply bit if this packet OK */
811 /* Once we've had two way comms, always ESTABLISHED. */
812 if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
813 DEBUGP("ip_conntrack_in: normal packet for %p\n",
815 *ctinfo = IP_CT_ESTABLISHED;
816 } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
817 DEBUGP("ip_conntrack_in: related packet for %p\n",
819 *ctinfo = IP_CT_RELATED;
821 DEBUGP("ip_conntrack_in: new packet for %p\n",
827 skb->nfct = &h->ctrack->infos[*ctinfo];
831 /* Netfilter hook itself. */
832 unsigned int ip_conntrack_in(unsigned int hooknum,
833 struct sk_buff **pskb,
834 const struct net_device *in,
835 const struct net_device *out,
836 int (*okfn)(struct sk_buff *))
838 struct ip_conntrack *ct;
839 enum ip_conntrack_info ctinfo;
840 struct ip_conntrack_protocol *proto;
845 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
846 if (net_ratelimit()) {
847 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
848 (*pskb)->nh.iph->protocol, hooknum);
853 /* FIXME: Do this right please. --RR */
854 (*pskb)->nfcache |= NFC_UNKNOWN;
856 /* Doesn't cover locally-generated broadcast, so not worth it. */
858 /* Ignore broadcast: no `connection'. */
859 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
860 printk("Broadcast packet!\n");
862 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
863 == htonl(0x000000FF)) {
864 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
865 NIPQUAD((*pskb)->nh.iph->saddr),
866 NIPQUAD((*pskb)->nh.iph->daddr),
867 (*pskb)->sk, (*pskb)->pkt_type);
871 /* Previously seen (loopback or untracked)? Ignore. */
875 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
877 /* It may be an icmp error... */
878 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
879 && icmp_error_track(*pskb, &ctinfo, hooknum))
882 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
883 /* Not valid part of a connection */
887 /* Too stressed to deal. */
890 IP_NF_ASSERT((*pskb)->nfct);
892 ret = proto->packet(ct, *pskb, ctinfo);
895 nf_conntrack_put((*pskb)->nfct);
896 (*pskb)->nfct = NULL;
900 if (ret != NF_DROP && ct->helper) {
901 ret = ct->helper->help(*pskb, ct, ctinfo);
904 nf_conntrack_put((*pskb)->nfct);
905 (*pskb)->nfct = NULL;
910 set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
915 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
916 const struct ip_conntrack_tuple *orig)
918 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
921 static inline int resent_expect(const struct ip_conntrack_expect *i,
922 const struct ip_conntrack_tuple *tuple,
923 const struct ip_conntrack_tuple *mask)
925 DEBUGP("resent_expect\n");
926 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
927 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
928 DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
929 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
930 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
931 && ip_ct_tuple_equal(&i->mask, mask));
934 /* Would two expected things clash? */
935 static inline int expect_clash(const struct ip_conntrack_expect *i,
936 const struct ip_conntrack_tuple *tuple,
937 const struct ip_conntrack_tuple *mask)
939 /* Part covered by intersection of masks must be unequal,
940 otherwise they clash */
941 struct ip_conntrack_tuple intersect_mask
942 = { { i->mask.src.ip & mask->src.ip,
943 { i->mask.src.u.all & mask->src.u.all } },
944 { i->mask.dst.ip & mask->dst.ip,
945 { i->mask.dst.u.all & mask->dst.u.all },
946 i->mask.dst.protonum & mask->dst.protonum } };
948 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
951 inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
953 WRITE_LOCK(&ip_conntrack_lock);
954 unexpect_related(expect);
955 WRITE_UNLOCK(&ip_conntrack_lock);
958 static void expectation_timed_out(unsigned long ul_expect)
960 struct ip_conntrack_expect *expect = (void *) ul_expect;
962 DEBUGP("expectation %p timed out\n", expect);
963 WRITE_LOCK(&ip_conntrack_lock);
964 __unexpect_related(expect);
965 WRITE_UNLOCK(&ip_conntrack_lock);
968 struct ip_conntrack_expect *
969 ip_conntrack_expect_alloc(void)
971 struct ip_conntrack_expect *new;
973 new = (struct ip_conntrack_expect *)
974 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
976 DEBUGP("expect_related: OOM allocating expect\n");
980 /* tuple_cmp compares whole union, we have to initialized cleanly */
981 memset(new, 0, sizeof(struct ip_conntrack_expect));
987 ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
988 struct ip_conntrack *related_to)
990 DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
991 new->expectant = related_to;
993 atomic_set(&new->use, 1);
995 /* add to expected list for this connection */
996 list_add_tail(&new->expected_list, &related_to->sibling_list);
997 /* add to global list of expectations */
998 list_prepend(&ip_conntrack_expect_list, &new->list);
999 /* add and start timer if required */
1000 if (related_to->helper->timeout) {
1001 init_timer(&new->timeout);
1002 new->timeout.data = (unsigned long)new;
1003 new->timeout.function = expectation_timed_out;
1004 new->timeout.expires = jiffies +
1005 related_to->helper->timeout * HZ;
1006 add_timer(&new->timeout);
1008 related_to->expecting++;
1011 /* Add a related connection. */
1012 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
1013 struct ip_conntrack *related_to)
1015 struct ip_conntrack_expect *old;
1018 WRITE_LOCK(&ip_conntrack_lock);
1019 /* Because of the write lock, no reader can walk the lists,
1020 * so there is no need to use the tuple lock too */
1022 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1023 DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1024 DEBUGP("mask: "); DUMP_TUPLE_RAW(&expect->mask);
1026 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
1027 struct ip_conntrack_expect *, &expect->tuple,
1030 /* Helper private data may contain offsets but no pointers
1031 pointing into the payload - otherwise we should have to copy
1032 the data filled out by the helper over the old one */
1033 DEBUGP("expect_related: resent packet\n");
1034 if (related_to->helper->timeout) {
1035 if (!del_timer(&old->timeout)) {
1036 /* expectation is dying. Fall through */
1039 old->timeout.expires = jiffies +
1040 related_to->helper->timeout * HZ;
1041 add_timer(&old->timeout);
1045 WRITE_UNLOCK(&ip_conntrack_lock);
1049 } else if (related_to->helper->max_expected &&
1050 related_to->expecting >= related_to->helper->max_expected) {
1052 if (!(related_to->helper->flags &
1053 IP_CT_HELPER_F_REUSE_EXPECT)) {
1054 WRITE_UNLOCK(&ip_conntrack_lock);
1055 if (net_ratelimit())
1057 "ip_conntrack: max number of expected "
1058 "connections %i of %s reached for "
1059 "%u.%u.%u.%u->%u.%u.%u.%u\n",
1060 related_to->helper->max_expected,
1061 related_to->helper->name,
1062 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1063 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1067 DEBUGP("ip_conntrack: max number of expected "
1068 "connections %i of %s reached for "
1069 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1070 related_to->helper->max_expected,
1071 related_to->helper->name,
1072 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1073 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1075 /* choose the the oldest expectation to evict */
1076 list_for_each_entry(old, &related_to->sibling_list,
1078 if (old->sibling == NULL)
1081 /* We cannot fail since related_to->expecting is the number
1082 * of unconfirmed expectations */
1083 IP_NF_ASSERT(old && old->sibling == NULL);
1085 /* newnat14 does not reuse the real allocated memory
1086 * structures but rather unexpects the old and
1087 * allocates a new. unexpect_related will decrement
1088 * related_to->expecting.
1090 unexpect_related(old);
1092 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1093 struct ip_conntrack_expect *, &expect->tuple,
1095 WRITE_UNLOCK(&ip_conntrack_lock);
1096 DEBUGP("expect_related: busy!\n");
1102 out: ip_conntrack_expect_insert(expect, related_to);
1104 WRITE_UNLOCK(&ip_conntrack_lock);
1109 /* Change tuple in an existing expectation */
1110 int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1111 struct ip_conntrack_tuple *newtuple)
1115 MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1116 WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1118 DEBUGP("change_expect:\n");
1119 DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1120 DEBUGP("exp mask: "); DUMP_TUPLE_RAW(&expect->mask);
1121 DEBUGP("newtuple: "); DUMP_TUPLE_RAW(newtuple);
1122 if (expect->ct_tuple.dst.protonum == 0) {
1123 /* Never seen before */
1124 DEBUGP("change expect: never seen before\n");
1125 if (!ip_ct_tuple_mask_cmp(&expect->tuple, newtuple, &expect->mask)
1126 && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1127 struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1128 /* Force NAT to find an unused tuple */
1131 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1132 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1137 DEBUGP("change expect: resent packet\n");
1138 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1141 /* Force NAT to choose again the same port */
1145 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1150 /* Alter reply tuple (maybe alter helper). If it's already taken,
1151 return 0 and don't do alteration. */
1152 int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1153 const struct ip_conntrack_tuple *newreply)
1155 WRITE_LOCK(&ip_conntrack_lock);
1156 if (__ip_conntrack_find(newreply, conntrack)) {
1157 WRITE_UNLOCK(&ip_conntrack_lock);
1160 /* Should be unconfirmed, so not in hash table yet */
1161 IP_NF_ASSERT(!is_confirmed(conntrack));
1163 DEBUGP("Altering reply tuple of %p to ", conntrack);
1164 DUMP_TUPLE(newreply);
1166 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1167 if (!conntrack->master && list_empty(&conntrack->sibling_list))
1168 conntrack->helper = ip_ct_find_helper(newreply);
1169 WRITE_UNLOCK(&ip_conntrack_lock);
1174 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1176 WRITE_LOCK(&ip_conntrack_lock);
1177 list_prepend(&helpers, me);
1178 WRITE_UNLOCK(&ip_conntrack_lock);
1183 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1184 const struct ip_conntrack_helper *me)
1186 if (i->ctrack->helper == me) {
1187 /* Get rid of any expected. */
1188 remove_expectations(i->ctrack, 0);
1189 /* And *then* set helper to NULL */
1190 i->ctrack->helper = NULL;
1195 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1199 /* Need write lock here, to delete helper. */
1200 WRITE_LOCK(&ip_conntrack_lock);
1201 LIST_DELETE(&helpers, me);
1203 /* Get rid of expecteds, set helpers to NULL. */
1204 for (i = 0; i < ip_conntrack_htable_size; i++)
1205 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1206 struct ip_conntrack_tuple_hash *, me);
1207 WRITE_UNLOCK(&ip_conntrack_lock);
1209 /* Someone could be still looking at the helper in a bh. */
1213 /* Refresh conntrack for this many jiffies. */
1214 void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1216 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1218 /* If not in hash table, timer will not be active yet */
1219 if (!is_confirmed(ct))
1220 ct->timeout.expires = extra_jiffies;
1222 WRITE_LOCK(&ip_conntrack_lock);
1223 /* Need del_timer for race avoidance (may already be dying). */
1224 if (del_timer(&ct->timeout)) {
1225 ct->timeout.expires = jiffies + extra_jiffies;
1226 add_timer(&ct->timeout);
1228 WRITE_UNLOCK(&ip_conntrack_lock);
1232 /* Returns new sk_buff, or NULL */
1234 ip_ct_gather_frags(struct sk_buff *skb)
1236 struct sock *sk = skb->sk;
1237 #ifdef CONFIG_NETFILTER_DEBUG
1238 unsigned int olddebug = skb->nf_debug;
1246 skb = ip_defrag(skb);
1256 skb_set_owner_w(skb, sk);
1260 ip_send_check(skb->nh.iph);
1261 skb->nfcache |= NFC_ALTERED;
1262 #ifdef CONFIG_NETFILTER_DEBUG
1263 /* Packet path as if nothing had happened. */
1264 skb->nf_debug = olddebug;
1269 /* Used by ipt_REJECT. */
1270 static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1272 struct ip_conntrack *ct;
1273 enum ip_conntrack_info ctinfo;
1275 ct = __ip_conntrack_get(nfct, &ctinfo);
1277 /* This ICMP is in reverse direction to the packet which
1279 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1280 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1282 ctinfo = IP_CT_RELATED;
1284 /* Attach new skbuff, and increment count */
1285 nskb->nfct = &ct->infos[ctinfo];
1286 atomic_inc(&ct->ct_general.use);
1290 do_kill(const struct ip_conntrack_tuple_hash *i,
1291 int (*kill)(const struct ip_conntrack *i, void *data),
1294 return kill(i->ctrack, data);
1297 /* Bring out ya dead! */
1298 static struct ip_conntrack_tuple_hash *
1299 get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1300 void *data, unsigned int *bucket)
1302 struct ip_conntrack_tuple_hash *h = NULL;
1304 READ_LOCK(&ip_conntrack_lock);
1305 for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
1306 h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
1307 struct ip_conntrack_tuple_hash *, kill, data);
1310 atomic_inc(&h->ctrack->ct_general.use);
1311 READ_UNLOCK(&ip_conntrack_lock);
1317 ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1320 struct ip_conntrack_tuple_hash *h;
1321 unsigned int bucket = 0;
1323 while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
1324 /* Time to push up daises... */
1325 if (del_timer(&h->ctrack->timeout))
1326 death_by_timeout((unsigned long)h->ctrack);
1327 /* ... else the timer will get him soon. */
1329 ip_conntrack_put(h->ctrack);
1333 /* Fast function for those who don't want to parse /proc (and I don't
1335 /* Reversing the socket's dst/src point of view gives us the reply
1338 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1340 struct inet_opt *inet = inet_sk(sk);
1341 struct ip_conntrack_tuple_hash *h;
1342 struct ip_conntrack_tuple tuple;
1344 IP_CT_TUPLE_U_BLANK(&tuple);
1345 tuple.src.ip = inet->rcv_saddr;
1346 tuple.src.u.tcp.port = inet->sport;
1347 tuple.dst.ip = inet->daddr;
1348 tuple.dst.u.tcp.port = inet->dport;
1349 tuple.dst.protonum = IPPROTO_TCP;
1351 /* We only do TCP at the moment: is there a better way? */
1352 if (strcmp(sk->sk_prot->name, "TCP")) {
1353 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1354 return -ENOPROTOOPT;
1357 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1358 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1359 *len, sizeof(struct sockaddr_in));
1363 h = ip_conntrack_find_get(&tuple, NULL);
1365 struct sockaddr_in sin;
1367 sin.sin_family = AF_INET;
1368 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1369 .tuple.dst.u.tcp.port;
1370 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1373 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1374 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1375 ip_conntrack_put(h->ctrack);
1376 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1381 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1382 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1383 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1387 static struct nf_sockopt_ops so_getorigdst = {
1389 .get_optmin = SO_ORIGINAL_DST,
1390 .get_optmax = SO_ORIGINAL_DST+1,
1394 static int kill_all(const struct ip_conntrack *i, void *data)
1399 /* Mishearing the voices in his head, our hero wonders how he's
1400 supposed to kill the mall. */
1401 void ip_conntrack_cleanup(void)
1403 ip_ct_attach = NULL;
1404 /* This makes sure all current packets have passed through
1405 netfilter framework. Roll on, two-stage module
1410 ip_ct_selective_cleanup(kill_all, NULL);
1411 if (atomic_read(&ip_conntrack_count) != 0) {
1413 goto i_see_dead_people;
1416 kmem_cache_destroy(ip_conntrack_cachep);
1417 vfree(ip_conntrack_hash);
1418 nf_unregister_sockopt(&so_getorigdst);
1421 static int hashsize;
1422 MODULE_PARM(hashsize, "i");
1424 int __init ip_conntrack_init(void)
1429 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1430 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1432 ip_conntrack_htable_size = hashsize;
1434 ip_conntrack_htable_size
1435 = (((num_physpages << PAGE_SHIFT) / 16384)
1436 / sizeof(struct list_head));
1437 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1438 ip_conntrack_htable_size = 8192;
1439 if (ip_conntrack_htable_size < 16)
1440 ip_conntrack_htable_size = 16;
1442 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1444 #ifdef CONFIG_MIPS_BRCM
1447 printk("ip_conntrack version %s (%u buckets, %d max)"
1448 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1449 ip_conntrack_htable_size, ip_conntrack_max,
1450 sizeof(struct ip_conntrack));
1452 ret = nf_register_sockopt(&so_getorigdst);
1454 printk(KERN_ERR "Unable to register netfilter socket option\n");
1458 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1459 * ip_conntrack_htable_size);
1460 if (!ip_conntrack_hash) {
1461 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1462 goto err_unreg_sockopt;
1465 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1466 sizeof(struct ip_conntrack), 0,
1467 SLAB_HWCACHE_ALIGN, NULL, NULL);
1468 if (!ip_conntrack_cachep) {
1469 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1472 /* Don't NEED lock here, but good form anyway. */
1473 WRITE_LOCK(&ip_conntrack_lock);
1474 /* Sew in builtin protocols. */
1475 list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1476 list_append(&protocol_list, &ip_conntrack_protocol_udp);
1477 list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1478 list_append(&protocol_list, &ip_conntrack_protocol_esp);
1479 WRITE_UNLOCK(&ip_conntrack_lock);
1481 for (i = 0; i < ip_conntrack_htable_size; i++)
1482 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1484 /* For use by ipt_REJECT */
1485 ip_ct_attach = ip_conntrack_attach;
1487 /* Set up fake conntrack:
1488 - to never be deleted, not in any hashes */
1489 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1490 /* - and look it like as a confirmed connection */
1491 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1492 /* - and prepare the ctinfo field for REJECT & NAT. */
1493 ip_conntrack_untracked.infos[IP_CT_NEW].master =
1494 ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1495 ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1496 &ip_conntrack_untracked.ct_general;
1501 vfree(ip_conntrack_hash);
1503 nf_unregister_sockopt(&so_getorigdst);