1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/types.h>
21 #include <linux/icmp.h>
23 #include <linux/netfilter.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/module.h>
26 #include <linux/skbuff.h>
27 #include <linux/proc_fs.h>
28 #include <linux/vmalloc.h>
29 #include <net/checksum.h>
31 #include <linux/stddef.h>
32 #include <linux/sysctl.h>
33 #include <linux/slab.h>
34 #include <linux/random.h>
35 #include <linux/jhash.h>
36 #include <linux/err.h>
37 #include <linux/percpu.h>
38 #include <linux/moduleparam.h>
39 #include <linux/notifier.h>
41 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42 registrations, conntrack timers*/
43 #include <linux/netfilter_ipv4/ip_conntrack.h>
44 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
45 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
48 #define IP_CONNTRACK_VERSION "2.4"
53 #define DEBUGP(format, args...)
56 DEFINE_RWLOCK(ip_conntrack_lock);
58 /* ip_conntrack_standalone needs this */
59 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
61 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
62 LIST_HEAD(ip_conntrack_expect_list);
63 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
64 static LIST_HEAD(helpers);
65 unsigned int ip_conntrack_htable_size __read_mostly = 0;
66 int ip_conntrack_max __read_mostly;
67 struct list_head *ip_conntrack_hash __read_mostly;
68 static struct kmem_cache *ip_conntrack_cachep __read_mostly;
69 static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
70 struct ip_conntrack ip_conntrack_untracked;
71 unsigned int ip_ct_log_invalid __read_mostly;
72 static LIST_HEAD(unconfirmed);
73 static int ip_conntrack_vmalloc __read_mostly;
75 static unsigned int ip_conntrack_next_id;
76 static unsigned int ip_conntrack_expect_next_id;
77 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
78 ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
79 ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
81 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
83 /* deliver cached events and clear cache entry - must be called with locally
84 * disabled softirqs */
86 __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
88 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
89 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
90 atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
93 ip_conntrack_put(ecache->ct);
97 /* Deliver all cached events for a particular conntrack. This is called
98 * by code prior to async packet handling or freeing the skb */
99 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
101 struct ip_conntrack_ecache *ecache;
104 ecache = &__get_cpu_var(ip_conntrack_ecache);
105 if (ecache->ct == ct)
106 __ip_ct_deliver_cached_events(ecache);
110 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
112 struct ip_conntrack_ecache *ecache;
114 /* take care of delivering potentially old events */
115 ecache = &__get_cpu_var(ip_conntrack_ecache);
116 BUG_ON(ecache->ct == ct);
118 __ip_ct_deliver_cached_events(ecache);
119 /* initialize for this conntrack/packet */
121 nf_conntrack_get(&ct->ct_general);
124 /* flush the event cache - touches other CPU's data and must not be called while
125 * packets are still passing through the code */
126 static void ip_ct_event_cache_flush(void)
128 struct ip_conntrack_ecache *ecache;
131 for_each_possible_cpu(cpu) {
132 ecache = &per_cpu(ip_conntrack_ecache, cpu);
134 ip_conntrack_put(ecache->ct);
138 static inline void ip_ct_event_cache_flush(void) {}
139 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
141 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
143 static int ip_conntrack_hash_rnd_initted;
144 static unsigned int ip_conntrack_hash_rnd;
146 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
147 unsigned int size, unsigned int rnd)
149 return (jhash_3words((__force u32)tuple->src.ip,
150 ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
151 (tuple->src.u.all | (tuple->dst.u.all << 16)),
156 hash_conntrack(const struct ip_conntrack_tuple *tuple)
158 return __hash_conntrack(tuple, ip_conntrack_htable_size,
159 ip_conntrack_hash_rnd);
163 ip_ct_get_tuple(const struct iphdr *iph,
164 const struct sk_buff *skb,
165 unsigned int dataoff,
166 struct ip_conntrack_tuple *tuple,
167 const struct ip_conntrack_protocol *protocol)
170 if (iph->frag_off & htons(IP_OFFSET)) {
171 printk("ip_conntrack_core: Frag of proto %u.\n",
176 tuple->src.ip = iph->saddr;
177 tuple->dst.ip = iph->daddr;
178 tuple->dst.protonum = iph->protocol;
179 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
181 return protocol->pkt_to_tuple(skb, dataoff, tuple);
185 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
186 const struct ip_conntrack_tuple *orig,
187 const struct ip_conntrack_protocol *protocol)
189 inverse->src.ip = orig->dst.ip;
190 inverse->dst.ip = orig->src.ip;
191 inverse->dst.protonum = orig->dst.protonum;
192 inverse->dst.dir = !orig->dst.dir;
194 return protocol->invert_tuple(inverse, orig);
198 /* ip_conntrack_expect helper functions */
199 void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
201 IP_NF_ASSERT(!timer_pending(&exp->timeout));
202 list_del(&exp->list);
203 CONNTRACK_STAT_INC(expect_delete);
204 exp->master->expecting--;
205 ip_conntrack_expect_put(exp);
208 static void expectation_timed_out(unsigned long ul_expect)
210 struct ip_conntrack_expect *exp = (void *)ul_expect;
212 write_lock_bh(&ip_conntrack_lock);
213 ip_ct_unlink_expect(exp);
214 write_unlock_bh(&ip_conntrack_lock);
215 ip_conntrack_expect_put(exp);
218 struct ip_conntrack_expect *
219 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
221 struct ip_conntrack_expect *i;
223 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
224 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
230 /* Just find a expectation corresponding to a tuple. */
231 struct ip_conntrack_expect *
232 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
234 struct ip_conntrack_expect *i;
236 read_lock_bh(&ip_conntrack_lock);
237 i = __ip_conntrack_expect_find(tuple);
240 read_unlock_bh(&ip_conntrack_lock);
245 /* If an expectation for this connection is found, it gets delete from
246 * global list then returned. */
247 static struct ip_conntrack_expect *
248 find_expectation(const struct ip_conntrack_tuple *tuple)
250 struct ip_conntrack_expect *i;
252 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
253 /* If master is not in hash table yet (ie. packet hasn't left
254 this machine yet), how can other end know about expected?
255 Hence these are not the droids you are looking for (if
256 master ct never got confirmed, we'd hold a reference to it
257 and weird things would happen to future packets). */
258 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
259 && is_confirmed(i->master)) {
260 if (i->flags & IP_CT_EXPECT_PERMANENT) {
263 } else if (del_timer(&i->timeout)) {
264 ip_ct_unlink_expect(i);
272 /* delete all expectations for this conntrack */
273 void ip_ct_remove_expectations(struct ip_conntrack *ct)
275 struct ip_conntrack_expect *i, *tmp;
277 /* Optimization: most connection never expect any others. */
278 if (ct->expecting == 0)
281 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
282 if (i->master == ct && del_timer(&i->timeout)) {
283 ip_ct_unlink_expect(i);
284 ip_conntrack_expect_put(i);
290 clean_from_lists(struct ip_conntrack *ct)
292 DEBUGP("clean_from_lists(%p)\n", ct);
293 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
294 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
296 /* Destroy all pending expectations */
297 ip_ct_remove_expectations(ct);
301 destroy_conntrack(struct nf_conntrack *nfct)
303 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
304 struct ip_conntrack_protocol *proto;
305 struct ip_conntrack_helper *helper;
307 DEBUGP("destroy_conntrack(%p)\n", ct);
308 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
309 IP_NF_ASSERT(!timer_pending(&ct->timeout));
311 ip_conntrack_event(IPCT_DESTROY, ct);
312 set_bit(IPS_DYING_BIT, &ct->status);
315 if (helper && helper->destroy)
318 /* To make sure we don't get any weird locking issues here:
319 * destroy_conntrack() MUST NOT be called with a write lock
320 * to ip_conntrack_lock!!! -HW */
321 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
322 if (proto && proto->destroy)
325 if (ip_conntrack_destroyed)
326 ip_conntrack_destroyed(ct);
328 write_lock_bh(&ip_conntrack_lock);
329 /* Expectations will have been removed in clean_from_lists,
330 * except TFTP can create an expectation on the first packet,
331 * before connection is in the list, so we need to clean here,
333 ip_ct_remove_expectations(ct);
335 /* We overload first tuple to link into unconfirmed list. */
336 if (!is_confirmed(ct)) {
337 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
338 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
341 CONNTRACK_STAT_INC(delete);
342 write_unlock_bh(&ip_conntrack_lock);
345 ip_conntrack_put(ct->master);
347 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
348 ip_conntrack_free(ct);
351 static void death_by_timeout(unsigned long ul_conntrack)
353 struct ip_conntrack *ct = (void *)ul_conntrack;
355 write_lock_bh(&ip_conntrack_lock);
356 /* Inside lock so preempt is disabled on module removal path.
357 * Otherwise we can get spurious warnings. */
358 CONNTRACK_STAT_INC(delete_list);
359 clean_from_lists(ct);
360 write_unlock_bh(&ip_conntrack_lock);
361 ip_conntrack_put(ct);
364 struct ip_conntrack_tuple_hash *
365 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
366 const struct ip_conntrack *ignored_conntrack)
368 struct ip_conntrack_tuple_hash *h;
369 unsigned int hash = hash_conntrack(tuple);
371 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
372 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
373 ip_ct_tuple_equal(tuple, &h->tuple)) {
374 CONNTRACK_STAT_INC(found);
377 CONNTRACK_STAT_INC(searched);
383 /* Find a connection corresponding to a tuple. */
384 struct ip_conntrack_tuple_hash *
385 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
386 const struct ip_conntrack *ignored_conntrack)
388 struct ip_conntrack_tuple_hash *h;
390 read_lock_bh(&ip_conntrack_lock);
391 h = __ip_conntrack_find(tuple, ignored_conntrack);
393 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
394 read_unlock_bh(&ip_conntrack_lock);
399 static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
401 unsigned int repl_hash)
403 ct->id = ++ip_conntrack_next_id;
404 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
405 &ip_conntrack_hash[hash]);
406 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
407 &ip_conntrack_hash[repl_hash]);
410 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
412 unsigned int hash, repl_hash;
414 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
415 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
417 write_lock_bh(&ip_conntrack_lock);
418 __ip_conntrack_hash_insert(ct, hash, repl_hash);
419 write_unlock_bh(&ip_conntrack_lock);
422 /* Confirm a connection given skb; places it in hash table */
424 __ip_conntrack_confirm(struct sk_buff **pskb)
426 unsigned int hash, repl_hash;
427 struct ip_conntrack_tuple_hash *h;
428 struct ip_conntrack *ct;
429 enum ip_conntrack_info ctinfo;
431 ct = ip_conntrack_get(*pskb, &ctinfo);
433 /* ipt_REJECT uses ip_conntrack_attach to attach related
434 ICMP/TCP RST packets in other direction. Actual packet
435 which created connection will be IP_CT_NEW or for an
436 expected connection, IP_CT_RELATED. */
437 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
440 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
441 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
443 /* We're not in hash table, and we refuse to set up related
444 connections for unconfirmed conns. But packet copies and
445 REJECT will give spurious warnings here. */
446 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
448 /* No external references means noone else could have
450 IP_NF_ASSERT(!is_confirmed(ct));
451 DEBUGP("Confirming conntrack %p\n", ct);
453 write_lock_bh(&ip_conntrack_lock);
455 /* See if there's one in the list already, including reverse:
456 NAT could have grabbed it without realizing, since we're
457 not in the hash. If there is, we lost race. */
458 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
459 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
462 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
463 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
467 /* Remove from unconfirmed list */
468 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
470 __ip_conntrack_hash_insert(ct, hash, repl_hash);
471 /* Timer relative to confirmation time, not original
472 setting time, otherwise we'd get timer wrap in
473 weird delay cases. */
474 ct->timeout.expires += jiffies;
475 add_timer(&ct->timeout);
476 atomic_inc(&ct->ct_general.use);
477 set_bit(IPS_CONFIRMED_BIT, &ct->status);
478 CONNTRACK_STAT_INC(insert);
479 write_unlock_bh(&ip_conntrack_lock);
481 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
482 #ifdef CONFIG_IP_NF_NAT_NEEDED
483 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
484 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
485 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
487 ip_conntrack_event_cache(master_ct(ct) ?
488 IPCT_RELATED : IPCT_NEW, *pskb);
493 CONNTRACK_STAT_INC(insert_failed);
494 write_unlock_bh(&ip_conntrack_lock);
498 /* Returns true if a connection correspondings to the tuple (required
501 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
502 const struct ip_conntrack *ignored_conntrack)
504 struct ip_conntrack_tuple_hash *h;
506 read_lock_bh(&ip_conntrack_lock);
507 h = __ip_conntrack_find(tuple, ignored_conntrack);
508 read_unlock_bh(&ip_conntrack_lock);
513 /* There's a small race here where we may free a just-assured
514 connection. Too bad: we're in trouble anyway. */
515 static int early_drop(struct list_head *chain)
517 /* Traverse backwards: gives us oldest, which is roughly LRU */
518 struct ip_conntrack_tuple_hash *h;
519 struct ip_conntrack *ct = NULL, *tmp;
522 read_lock_bh(&ip_conntrack_lock);
523 list_for_each_entry_reverse(h, chain, list) {
524 tmp = tuplehash_to_ctrack(h);
525 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
527 atomic_inc(&ct->ct_general.use);
531 read_unlock_bh(&ip_conntrack_lock);
536 if (del_timer(&ct->timeout)) {
537 death_by_timeout((unsigned long)ct);
539 CONNTRACK_STAT_INC(early_drop);
541 ip_conntrack_put(ct);
545 static struct ip_conntrack_helper *
546 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
548 struct ip_conntrack_helper *h;
550 list_for_each_entry(h, &helpers, list) {
551 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
557 struct ip_conntrack_helper *
558 ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
560 struct ip_conntrack_helper *helper;
562 /* need ip_conntrack_lock to assure that helper exists until
563 * try_module_get() is called */
564 read_lock_bh(&ip_conntrack_lock);
566 helper = __ip_conntrack_helper_find(tuple);
568 /* need to increase module usage count to assure helper will
569 * not go away while the caller is e.g. busy putting a
570 * conntrack in the hash that uses the helper */
571 if (!try_module_get(helper->me))
575 read_unlock_bh(&ip_conntrack_lock);
580 void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
582 module_put(helper->me);
585 struct ip_conntrack_protocol *
586 __ip_conntrack_proto_find(u_int8_t protocol)
588 return ip_ct_protos[protocol];
591 /* this is guaranteed to always return a valid protocol helper, since
592 * it falls back to generic_protocol */
593 struct ip_conntrack_protocol *
594 ip_conntrack_proto_find_get(u_int8_t protocol)
596 struct ip_conntrack_protocol *p;
599 p = __ip_conntrack_proto_find(protocol);
601 if (!try_module_get(p->me))
602 p = &ip_conntrack_generic_protocol;
609 void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
614 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
615 struct ip_conntrack_tuple *repl)
617 struct ip_conntrack *conntrack;
619 if (!ip_conntrack_hash_rnd_initted) {
620 get_random_bytes(&ip_conntrack_hash_rnd, 4);
621 ip_conntrack_hash_rnd_initted = 1;
624 /* We don't want any race condition at early drop stage */
625 atomic_inc(&ip_conntrack_count);
628 && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
629 unsigned int hash = hash_conntrack(orig);
630 /* Try dropping from this hash chain. */
631 if (!early_drop(&ip_conntrack_hash[hash])) {
632 atomic_dec(&ip_conntrack_count);
635 "ip_conntrack: table full, dropping"
637 return ERR_PTR(-ENOMEM);
641 conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
643 DEBUGP("Can't allocate conntrack.\n");
644 atomic_dec(&ip_conntrack_count);
645 return ERR_PTR(-ENOMEM);
648 atomic_set(&conntrack->ct_general.use, 1);
649 conntrack->ct_general.destroy = destroy_conntrack;
650 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
651 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
652 /* Don't set timer yet: wait for confirmation */
653 init_timer(&conntrack->timeout);
654 conntrack->timeout.data = (unsigned long)conntrack;
655 conntrack->timeout.function = death_by_timeout;
661 ip_conntrack_free(struct ip_conntrack *conntrack)
663 atomic_dec(&ip_conntrack_count);
664 kmem_cache_free(ip_conntrack_cachep, conntrack);
667 /* Allocate a new conntrack: we return -ENOMEM if classification
668 * failed due to stress. Otherwise it really is unclassifiable */
669 static struct ip_conntrack_tuple_hash *
670 init_conntrack(struct ip_conntrack_tuple *tuple,
671 struct ip_conntrack_protocol *protocol,
674 struct ip_conntrack *conntrack;
675 struct ip_conntrack_tuple repl_tuple;
676 struct ip_conntrack_expect *exp;
678 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
679 DEBUGP("Can't invert tuple.\n");
683 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
684 if (conntrack == NULL || IS_ERR(conntrack))
685 return (struct ip_conntrack_tuple_hash *)conntrack;
687 if (!protocol->new(conntrack, skb)) {
688 ip_conntrack_free(conntrack);
692 write_lock_bh(&ip_conntrack_lock);
693 exp = find_expectation(tuple);
696 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
698 /* Welcome, Mr. Bond. We've been expecting you... */
699 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
700 conntrack->master = exp->master;
701 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
702 conntrack->mark = exp->master->mark;
704 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
705 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
706 /* this is ugly, but there is no other place where to put it */
707 conntrack->nat.masq_index = exp->master->nat.masq_index;
709 #ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
710 conntrack->secmark = exp->master->secmark;
712 nf_conntrack_get(&conntrack->master->ct_general);
713 CONNTRACK_STAT_INC(expect_new);
715 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
717 CONNTRACK_STAT_INC(new);
720 /* Overload tuple linked list to put us in unconfirmed list. */
721 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
723 write_unlock_bh(&ip_conntrack_lock);
727 exp->expectfn(conntrack, exp);
728 ip_conntrack_expect_put(exp);
731 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
734 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
735 static inline struct ip_conntrack *
736 resolve_normal_ct(struct sk_buff *skb,
737 struct ip_conntrack_protocol *proto,
739 unsigned int hooknum,
740 enum ip_conntrack_info *ctinfo)
742 struct ip_conntrack_tuple tuple;
743 struct ip_conntrack_tuple_hash *h;
744 struct ip_conntrack *ct;
746 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
748 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
752 /* look for tuple match */
753 h = ip_conntrack_find_get(&tuple, NULL);
755 h = init_conntrack(&tuple, proto, skb);
761 ct = tuplehash_to_ctrack(h);
763 /* It exists; we have (non-exclusive) reference. */
764 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
765 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
766 /* Please set reply bit if this packet OK */
769 /* Once we've had two way comms, always ESTABLISHED. */
770 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
771 DEBUGP("ip_conntrack_in: normal packet for %p\n",
773 *ctinfo = IP_CT_ESTABLISHED;
774 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
775 DEBUGP("ip_conntrack_in: related packet for %p\n",
777 *ctinfo = IP_CT_RELATED;
779 DEBUGP("ip_conntrack_in: new packet for %p\n",
785 skb->nfct = &ct->ct_general;
786 skb->nfctinfo = *ctinfo;
790 /* Netfilter hook itself. */
791 unsigned int ip_conntrack_in(unsigned int hooknum,
792 struct sk_buff **pskb,
793 const struct net_device *in,
794 const struct net_device *out,
795 int (*okfn)(struct sk_buff *))
797 struct ip_conntrack *ct;
798 enum ip_conntrack_info ctinfo;
799 struct ip_conntrack_protocol *proto;
803 /* Previously seen (loopback or untracked)? Ignore. */
805 CONNTRACK_STAT_INC(ignore);
810 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
811 if (net_ratelimit()) {
812 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
813 (*pskb)->nh.iph->protocol, hooknum);
818 /* Doesn't cover locally-generated broadcast, so not worth it. */
820 /* Ignore broadcast: no `connection'. */
821 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
822 printk("Broadcast packet!\n");
824 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
825 == htonl(0x000000FF)) {
826 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
827 NIPQUAD((*pskb)->nh.iph->saddr),
828 NIPQUAD((*pskb)->nh.iph->daddr),
829 (*pskb)->sk, (*pskb)->pkt_type);
833 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
835 /* It may be an special packet, error, unclean...
836 * inverse of the return code tells to the netfilter
837 * core what to do with the packet. */
838 if (proto->error != NULL
839 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
840 CONNTRACK_STAT_INC(error);
841 CONNTRACK_STAT_INC(invalid);
845 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
846 /* Not valid part of a connection */
847 CONNTRACK_STAT_INC(invalid);
852 /* Too stressed to deal. */
853 CONNTRACK_STAT_INC(drop);
857 IP_NF_ASSERT((*pskb)->nfct);
859 ret = proto->packet(ct, *pskb, ctinfo);
861 /* Invalid: inverse of the return code tells
862 * the netfilter core what to do*/
863 nf_conntrack_put((*pskb)->nfct);
864 (*pskb)->nfct = NULL;
865 CONNTRACK_STAT_INC(invalid);
869 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
870 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
875 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
876 const struct ip_conntrack_tuple *orig)
878 return ip_ct_invert_tuple(inverse, orig,
879 __ip_conntrack_proto_find(orig->dst.protonum));
882 /* Would two expected things clash? */
883 static inline int expect_clash(const struct ip_conntrack_expect *a,
884 const struct ip_conntrack_expect *b)
886 /* Part covered by intersection of masks must be unequal,
887 otherwise they clash */
888 struct ip_conntrack_tuple intersect_mask
889 = { { a->mask.src.ip & b->mask.src.ip,
890 { a->mask.src.u.all & b->mask.src.u.all } },
891 { a->mask.dst.ip & b->mask.dst.ip,
892 { a->mask.dst.u.all & b->mask.dst.u.all },
893 a->mask.dst.protonum & b->mask.dst.protonum } };
895 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
898 static inline int expect_matches(const struct ip_conntrack_expect *a,
899 const struct ip_conntrack_expect *b)
901 return a->master == b->master
902 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
903 && ip_ct_tuple_equal(&a->mask, &b->mask);
906 /* Generally a bad idea to call this: could have matched already. */
907 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
909 struct ip_conntrack_expect *i;
911 write_lock_bh(&ip_conntrack_lock);
912 /* choose the the oldest expectation to evict */
913 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
914 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
915 ip_ct_unlink_expect(i);
916 write_unlock_bh(&ip_conntrack_lock);
917 ip_conntrack_expect_put(i);
921 write_unlock_bh(&ip_conntrack_lock);
924 /* We don't increase the master conntrack refcount for non-fulfilled
925 * conntracks. During the conntrack destruction, the expectations are
926 * always killed before the conntrack itself */
927 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
929 struct ip_conntrack_expect *new;
931 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
933 DEBUGP("expect_related: OOM allocating expect\n");
937 atomic_set(&new->use, 1);
941 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
943 if (atomic_dec_and_test(&exp->use))
944 kmem_cache_free(ip_conntrack_expect_cachep, exp);
947 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
949 atomic_inc(&exp->use);
950 exp->master->expecting++;
951 list_add(&exp->list, &ip_conntrack_expect_list);
953 init_timer(&exp->timeout);
954 exp->timeout.data = (unsigned long)exp;
955 exp->timeout.function = expectation_timed_out;
956 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
957 add_timer(&exp->timeout);
959 exp->id = ++ip_conntrack_expect_next_id;
960 atomic_inc(&exp->use);
961 CONNTRACK_STAT_INC(expect_create);
964 /* Race with expectations being used means we could have none to find; OK. */
965 static void evict_oldest_expect(struct ip_conntrack *master)
967 struct ip_conntrack_expect *i;
969 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
970 if (i->master == master) {
971 if (del_timer(&i->timeout)) {
972 ip_ct_unlink_expect(i);
973 ip_conntrack_expect_put(i);
980 static inline int refresh_timer(struct ip_conntrack_expect *i)
982 if (!del_timer(&i->timeout))
985 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
986 add_timer(&i->timeout);
990 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
992 struct ip_conntrack_expect *i;
995 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
996 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
997 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
999 write_lock_bh(&ip_conntrack_lock);
1000 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1001 if (expect_matches(i, expect)) {
1002 /* Refresh timer: if it's dying, ignore.. */
1003 if (refresh_timer(i)) {
1007 } else if (expect_clash(i, expect)) {
1013 /* Will be over limit? */
1014 if (expect->master->helper->max_expected &&
1015 expect->master->expecting >= expect->master->helper->max_expected)
1016 evict_oldest_expect(expect->master);
1018 ip_conntrack_expect_insert(expect);
1019 ip_conntrack_expect_event(IPEXP_NEW, expect);
1022 write_unlock_bh(&ip_conntrack_lock);
1026 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1027 implicitly racy: see __ip_conntrack_confirm */
1028 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1029 const struct ip_conntrack_tuple *newreply)
1031 write_lock_bh(&ip_conntrack_lock);
1032 /* Should be unconfirmed, so not in hash table yet */
1033 IP_NF_ASSERT(!is_confirmed(conntrack));
1035 DEBUGP("Altering reply tuple of %p to ", conntrack);
1036 DUMP_TUPLE(newreply);
1038 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1039 if (!conntrack->master && conntrack->expecting == 0)
1040 conntrack->helper = __ip_conntrack_helper_find(newreply);
1041 write_unlock_bh(&ip_conntrack_lock);
1044 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1046 BUG_ON(me->timeout == 0);
1047 write_lock_bh(&ip_conntrack_lock);
1048 list_add(&me->list, &helpers);
1049 write_unlock_bh(&ip_conntrack_lock);
1054 struct ip_conntrack_helper *
1055 __ip_conntrack_helper_find_byname(const char *name)
1057 struct ip_conntrack_helper *h;
1059 list_for_each_entry(h, &helpers, list) {
1060 if (!strcmp(h->name, name))
1067 static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1068 const struct ip_conntrack_helper *me)
1070 if (tuplehash_to_ctrack(i)->helper == me) {
1071 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1072 tuplehash_to_ctrack(i)->helper = NULL;
1076 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1079 struct ip_conntrack_tuple_hash *h;
1080 struct ip_conntrack_expect *exp, *tmp;
1082 /* Need write lock here, to delete helper. */
1083 write_lock_bh(&ip_conntrack_lock);
1084 list_del(&me->list);
1086 /* Get rid of expectations */
1087 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1088 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1089 ip_ct_unlink_expect(exp);
1090 ip_conntrack_expect_put(exp);
1093 /* Get rid of expecteds, set helpers to NULL. */
1094 list_for_each_entry(h, &unconfirmed, list)
1096 for (i = 0; i < ip_conntrack_htable_size; i++) {
1097 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1100 write_unlock_bh(&ip_conntrack_lock);
1102 /* Someone could be still looking at the helper in a bh. */
1106 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1107 void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1108 enum ip_conntrack_info ctinfo,
1109 const struct sk_buff *skb,
1110 unsigned long extra_jiffies,
1115 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1118 write_lock_bh(&ip_conntrack_lock);
1120 /* Only update if this is not a fixed timeout */
1121 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1122 write_unlock_bh(&ip_conntrack_lock);
1126 /* If not in hash table, timer will not be active yet */
1127 if (!is_confirmed(ct)) {
1128 ct->timeout.expires = extra_jiffies;
1129 event = IPCT_REFRESH;
1131 /* Need del_timer for race avoidance (may already be dying). */
1132 if (del_timer(&ct->timeout)) {
1133 ct->timeout.expires = jiffies + extra_jiffies;
1134 add_timer(&ct->timeout);
1135 event = IPCT_REFRESH;
1139 #ifdef CONFIG_IP_NF_CT_ACCT
1141 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1142 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1143 ntohs(skb->nh.iph->tot_len);
1144 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1145 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1146 event |= IPCT_COUNTER_FILLING;
1150 write_unlock_bh(&ip_conntrack_lock);
1152 /* must be unlocked when calling event cache */
1154 ip_conntrack_event_cache(event, skb);
1157 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1158 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1159 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1160 * in ip_conntrack_core, since we don't want the protocols to autoload
1161 * or depend on ctnetlink */
1162 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1163 const struct ip_conntrack_tuple *tuple)
1165 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
1166 &tuple->src.u.tcp.port);
1167 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
1168 &tuple->dst.u.tcp.port);
1175 int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1176 struct ip_conntrack_tuple *t)
1178 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1182 *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1184 *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1190 /* Returns new sk_buff, or NULL */
1192 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1197 skb = ip_defrag(skb, user);
1201 ip_send_check(skb->nh.iph);
1205 /* Used by ipt_REJECT. */
1206 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1208 struct ip_conntrack *ct;
1209 enum ip_conntrack_info ctinfo;
1211 /* This ICMP is in reverse direction to the packet which caused it */
1212 ct = ip_conntrack_get(skb, &ctinfo);
1214 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1215 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1217 ctinfo = IP_CT_RELATED;
1219 /* Attach to new skbuff, and increment count */
1220 nskb->nfct = &ct->ct_general;
1221 nskb->nfctinfo = ctinfo;
1222 nf_conntrack_get(nskb->nfct);
1225 /* Bring out ya dead! */
1226 static struct ip_conntrack *
1227 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1228 void *data, unsigned int *bucket)
1230 struct ip_conntrack_tuple_hash *h;
1231 struct ip_conntrack *ct;
1233 write_lock_bh(&ip_conntrack_lock);
1234 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1235 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1236 ct = tuplehash_to_ctrack(h);
1241 list_for_each_entry(h, &unconfirmed, list) {
1242 ct = tuplehash_to_ctrack(h);
1246 write_unlock_bh(&ip_conntrack_lock);
1250 atomic_inc(&ct->ct_general.use);
1251 write_unlock_bh(&ip_conntrack_lock);
1256 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1258 struct ip_conntrack *ct;
1259 unsigned int bucket = 0;
1261 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1262 /* Time to push up daises... */
1263 if (del_timer(&ct->timeout))
1264 death_by_timeout((unsigned long)ct);
1265 /* ... else the timer will get him soon. */
1267 ip_conntrack_put(ct);
1271 /* Fast function for those who don't want to parse /proc (and I don't
1273 /* Reversing the socket's dst/src point of view gives us the reply
1276 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1278 struct inet_sock *inet = inet_sk(sk);
1279 struct ip_conntrack_tuple_hash *h;
1280 struct ip_conntrack_tuple tuple;
1282 IP_CT_TUPLE_U_BLANK(&tuple);
1283 tuple.src.ip = inet->rcv_saddr;
1284 tuple.src.u.tcp.port = inet->sport;
1285 tuple.dst.ip = inet->daddr;
1286 tuple.dst.u.tcp.port = inet->dport;
1287 tuple.dst.protonum = IPPROTO_TCP;
1289 /* We only do TCP at the moment: is there a better way? */
1290 if (strcmp(sk->sk_prot->name, "TCP")) {
1291 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1292 return -ENOPROTOOPT;
1295 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1296 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1297 *len, sizeof(struct sockaddr_in));
1301 h = ip_conntrack_find_get(&tuple, NULL);
1303 struct sockaddr_in sin;
1304 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1306 sin.sin_family = AF_INET;
1307 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1308 .tuple.dst.u.tcp.port;
1309 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1311 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
1313 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1314 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1315 ip_conntrack_put(ct);
1316 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1321 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1322 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1323 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1327 static struct nf_sockopt_ops so_getorigdst = {
1329 .get_optmin = SO_ORIGINAL_DST,
1330 .get_optmax = SO_ORIGINAL_DST+1,
1334 static int kill_all(struct ip_conntrack *i, void *data)
1339 void ip_conntrack_flush(void)
1341 ip_ct_iterate_cleanup(kill_all, NULL);
1344 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1349 free_pages((unsigned long)hash,
1350 get_order(sizeof(struct list_head) * size));
1353 /* Mishearing the voices in his head, our hero wonders how he's
1354 supposed to kill the mall. */
1355 void ip_conntrack_cleanup(void)
1357 ip_ct_attach = NULL;
1359 /* This makes sure all current packets have passed through
1360 netfilter framework. Roll on, two-stage module
1364 ip_ct_event_cache_flush();
1366 ip_conntrack_flush();
1367 if (atomic_read(&ip_conntrack_count) != 0) {
1369 goto i_see_dead_people;
1371 /* wait until all references to ip_conntrack_untracked are dropped */
1372 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1375 kmem_cache_destroy(ip_conntrack_cachep);
1376 kmem_cache_destroy(ip_conntrack_expect_cachep);
1377 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1378 ip_conntrack_htable_size);
1379 nf_unregister_sockopt(&so_getorigdst);
1382 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1384 struct list_head *hash;
1388 hash = (void*)__get_free_pages(GFP_KERNEL,
1389 get_order(sizeof(struct list_head)
1393 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1394 hash = vmalloc(sizeof(struct list_head) * size);
1398 for (i = 0; i < size; i++)
1399 INIT_LIST_HEAD(&hash[i]);
1404 static int set_hashsize(const char *val, struct kernel_param *kp)
1406 int i, bucket, hashsize, vmalloced;
1407 int old_vmalloced, old_size;
1409 struct list_head *hash, *old_hash;
1410 struct ip_conntrack_tuple_hash *h;
1412 /* On boot, we can set this without any fancy locking. */
1413 if (!ip_conntrack_htable_size)
1414 return param_set_int(val, kp);
1416 hashsize = simple_strtol(val, NULL, 0);
1420 hash = alloc_hashtable(hashsize, &vmalloced);
1424 /* We have to rehash for the new table anyway, so we also can
1425 * use a new random seed */
1426 get_random_bytes(&rnd, 4);
1428 write_lock_bh(&ip_conntrack_lock);
1429 for (i = 0; i < ip_conntrack_htable_size; i++) {
1430 while (!list_empty(&ip_conntrack_hash[i])) {
1431 h = list_entry(ip_conntrack_hash[i].next,
1432 struct ip_conntrack_tuple_hash, list);
1434 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1435 list_add_tail(&h->list, &hash[bucket]);
1438 old_size = ip_conntrack_htable_size;
1439 old_vmalloced = ip_conntrack_vmalloc;
1440 old_hash = ip_conntrack_hash;
1442 ip_conntrack_htable_size = hashsize;
1443 ip_conntrack_vmalloc = vmalloced;
1444 ip_conntrack_hash = hash;
1445 ip_conntrack_hash_rnd = rnd;
1446 write_unlock_bh(&ip_conntrack_lock);
1448 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1452 module_param_call(hashsize, set_hashsize, param_get_uint,
1453 &ip_conntrack_htable_size, 0600);
1455 int __init ip_conntrack_init(void)
1460 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1461 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1462 if (!ip_conntrack_htable_size) {
1463 ip_conntrack_htable_size
1464 = (((num_physpages << PAGE_SHIFT) / 16384)
1465 / sizeof(struct list_head));
1466 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1467 ip_conntrack_htable_size = 8192;
1468 if (ip_conntrack_htable_size < 16)
1469 ip_conntrack_htable_size = 16;
1471 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1473 printk("ip_conntrack version %s (%u buckets, %d max)"
1474 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1475 ip_conntrack_htable_size, ip_conntrack_max,
1476 sizeof(struct ip_conntrack));
1478 ret = nf_register_sockopt(&so_getorigdst);
1480 printk(KERN_ERR "Unable to register netfilter socket option\n");
1484 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1485 &ip_conntrack_vmalloc);
1486 if (!ip_conntrack_hash) {
1487 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1488 goto err_unreg_sockopt;
1491 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1492 sizeof(struct ip_conntrack), 0,
1494 if (!ip_conntrack_cachep) {
1495 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1499 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1500 sizeof(struct ip_conntrack_expect),
1502 if (!ip_conntrack_expect_cachep) {
1503 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1504 goto err_free_conntrack_slab;
1507 /* Don't NEED lock here, but good form anyway. */
1508 write_lock_bh(&ip_conntrack_lock);
1509 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1510 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1511 /* Sew in builtin protocols. */
1512 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1513 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1514 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1515 write_unlock_bh(&ip_conntrack_lock);
1517 /* For use by ipt_REJECT */
1518 ip_ct_attach = ip_conntrack_attach;
1520 /* Set up fake conntrack:
1521 - to never be deleted, not in any hashes */
1522 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1523 /* - and look it like as a confirmed connection */
1524 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1528 err_free_conntrack_slab:
1529 kmem_cache_destroy(ip_conntrack_cachep);
1531 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1532 ip_conntrack_htable_size);
1534 nf_unregister_sockopt(&so_getorigdst);