2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the Netfilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Version: $Id: ip_vs_conn.c,v 1.28.2.5 2003/08/09 13:27:08 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
19 * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
20 * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
21 * and others. Many code here is taken from IP MASQ code of kernel 2.2.
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/vmalloc.h>
31 #include <linux/tcp.h> /* for tcphdr */
33 #include <linux/proc_fs.h> /* for proc_net_* */
34 #include <asm/softirq.h> /* for local_bh_* */
36 #include <net/tcp.h> /* for csum_tcpudp_magic */
38 #include <net/icmp.h> /* for icmp_send */
39 #include <net/route.h> /* for ip_route_output */
40 #include <linux/netfilter.h>
41 #include <linux/netfilter_ipv4.h>
42 #include <linux/jhash.h>
43 #include <linux/random.h>
45 #include <net/ip_vs.h>
49 * Connection hash table: for input and output packets lookups of IPVS
51 static struct list_head *ip_vs_conn_tab;
53 /* SLAB cache for IPVS connections */
54 static kmem_cache_t *ip_vs_conn_cachep;
56 /* counter for current IPVS connections */
57 static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
59 /* counter for no-client-port connections */
60 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
62 /* random value for IPVS connection hash */
63 static unsigned int ip_vs_conn_rnd;
66 * Fine locking granularity for big connection hash table
68 #define CT_LOCKARRAY_BITS 4
69 #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
70 #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
72 struct ip_vs_aligned_lock
75 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
77 /* lock array for conn table */
78 struct ip_vs_aligned_lock
79 __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
81 static inline void ct_read_lock(unsigned key)
83 read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
86 static inline void ct_read_unlock(unsigned key)
88 read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
91 static inline void ct_write_lock(unsigned key)
93 write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
96 static inline void ct_write_unlock(unsigned key)
98 write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
101 static inline void ct_read_lock_bh(unsigned key)
103 read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
106 static inline void ct_read_unlock_bh(unsigned key)
108 read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
111 static inline void ct_write_lock_bh(unsigned key)
113 write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
116 static inline void ct_write_unlock_bh(unsigned key)
118 write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
123 * Returns hash value for IPVS connection entry
126 ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port)
128 return jhash_3words(addr, port, proto, ip_vs_conn_rnd)
129 & IP_VS_CONN_TAB_MASK;
134 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
135 * returns bool success.
137 static int ip_vs_conn_hash(struct ip_vs_conn *cp)
142 /* Hash by protocol, client address and port */
143 hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
147 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
148 list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
149 cp->flags |= IP_VS_CONN_F_HASHED;
150 atomic_inc(&cp->refcnt);
153 IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
154 "called from %p\n", __builtin_return_address(0));
158 ct_write_unlock(hash);
165 * UNhashes ip_vs_conn from ip_vs_conn_tab.
166 * returns bool success.
168 static int ip_vs_conn_unhash(struct ip_vs_conn *cp)
173 /* unhash it and decrease its reference counter */
174 hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
177 if (cp->flags & IP_VS_CONN_F_HASHED) {
178 list_del(&cp->c_list);
179 cp->flags &= ~IP_VS_CONN_F_HASHED;
180 atomic_dec(&cp->refcnt);
185 ct_write_unlock(hash);
192 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
193 * Called for pkts coming from OUTside-to-INside.
194 * s_addr, s_port: pkt source address (foreign host)
195 * d_addr, d_port: pkt dest address (load balancer)
197 static inline struct ip_vs_conn *__ip_vs_conn_in_get
198 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
201 struct ip_vs_conn *cp;
202 struct list_head *l,*e;
204 hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
205 l = &ip_vs_conn_tab[hash];
209 for (e=l->next; e!=l; e=e->next) {
210 cp = list_entry(e, struct ip_vs_conn, c_list);
211 if (s_addr==cp->caddr && s_port==cp->cport &&
212 d_port==cp->vport && d_addr==cp->vaddr &&
213 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
214 protocol==cp->protocol) {
216 atomic_inc(&cp->refcnt);
217 ct_read_unlock(hash);
222 ct_read_unlock(hash);
227 struct ip_vs_conn *ip_vs_conn_in_get
228 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
230 struct ip_vs_conn *cp;
232 cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
233 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
234 cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
236 IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
237 ip_vs_proto_name(protocol),
238 NIPQUAD(s_addr), ntohs(s_port),
239 NIPQUAD(d_addr), ntohs(d_port),
245 /* Get reference to connection template */
246 struct ip_vs_conn *ip_vs_ct_in_get
247 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
250 struct ip_vs_conn *cp;
252 hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
256 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
257 if (s_addr==cp->caddr && s_port==cp->cport &&
258 d_port==cp->vport && d_addr==cp->vaddr &&
259 cp->flags & IP_VS_CONN_F_TEMPLATE &&
260 protocol==cp->protocol) {
262 atomic_inc(&cp->refcnt);
269 ct_read_unlock(hash);
271 IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
272 ip_vs_proto_name(protocol),
273 NIPQUAD(s_addr), ntohs(s_port),
274 NIPQUAD(d_addr), ntohs(d_port),
281 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
282 * Called for pkts coming from inside-to-OUTside.
283 * s_addr, s_port: pkt source address (inside host)
284 * d_addr, d_port: pkt dest address (foreign host)
286 struct ip_vs_conn *ip_vs_conn_out_get
287 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
290 struct ip_vs_conn *cp, *ret=NULL;
291 struct list_head *l,*e;
294 * Check for "full" addressed entries
296 hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
297 l = &ip_vs_conn_tab[hash];
301 for (e=l->next; e!=l; e=e->next) {
302 cp = list_entry(e, struct ip_vs_conn, c_list);
303 if (d_addr == cp->caddr && d_port == cp->cport &&
304 s_port == cp->dport && s_addr == cp->daddr &&
305 protocol == cp->protocol) {
307 atomic_inc(&cp->refcnt);
313 ct_read_unlock(hash);
315 IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
316 ip_vs_proto_name(protocol),
317 NIPQUAD(s_addr), ntohs(s_port),
318 NIPQUAD(d_addr), ntohs(d_port),
319 ret?"hit":"not hit");
326 * Put back the conn and restart its timer with its timeout
328 void ip_vs_conn_put(struct ip_vs_conn *cp)
330 /* reset it expire in its timeout */
331 mod_timer(&cp->timer, jiffies+cp->timeout);
333 __ip_vs_conn_put(cp);
338 * Timeout table[state]
340 struct ip_vs_timeout_table vs_timeout_table = {
341 ATOMIC_INIT(0), /* refcnt */
344 [IP_VS_S_NONE] = 30*60*HZ,
345 [IP_VS_S_ESTABLISHED] = 15*60*HZ,
346 [IP_VS_S_SYN_SENT] = 2*60*HZ,
347 [IP_VS_S_SYN_RECV] = 1*60*HZ,
348 [IP_VS_S_FIN_WAIT] = 2*60*HZ,
349 [IP_VS_S_TIME_WAIT] = 2*60*HZ,
350 [IP_VS_S_CLOSE] = 10*HZ,
351 [IP_VS_S_CLOSE_WAIT] = 60*HZ,
352 [IP_VS_S_LAST_ACK] = 30*HZ,
353 [IP_VS_S_LISTEN] = 2*60*HZ,
354 [IP_VS_S_SYNACK] = 120*HZ,
355 [IP_VS_S_UDP] = 5*60*HZ,
356 [IP_VS_S_ICMP] = 1*60*HZ,
357 [IP_VS_S_LAST] = 2*HZ,
362 struct ip_vs_timeout_table vs_timeout_table_dos = {
363 ATOMIC_INIT(0), /* refcnt */
366 [IP_VS_S_NONE] = 15*60*HZ,
367 [IP_VS_S_ESTABLISHED] = 8*60*HZ,
368 [IP_VS_S_SYN_SENT] = 60*HZ,
369 [IP_VS_S_SYN_RECV] = 10*HZ,
370 [IP_VS_S_FIN_WAIT] = 60*HZ,
371 [IP_VS_S_TIME_WAIT] = 60*HZ,
372 [IP_VS_S_CLOSE] = 10*HZ,
373 [IP_VS_S_CLOSE_WAIT] = 60*HZ,
374 [IP_VS_S_LAST_ACK] = 30*HZ,
375 [IP_VS_S_LISTEN] = 2*60*HZ,
376 [IP_VS_S_SYNACK] = 100*HZ,
377 [IP_VS_S_UDP] = 3*60*HZ,
378 [IP_VS_S_ICMP] = 1*60*HZ,
379 [IP_VS_S_LAST] = 2*HZ,
385 * Timeout table to use for the VS entries
386 * If NULL we use the default table (vs_timeout_table).
387 * Under flood attack we switch to vs_timeout_table_dos
390 static struct ip_vs_timeout_table *ip_vs_timeout_table = &vs_timeout_table;
392 static const char * state_name_table[IP_VS_S_LAST+1] = {
393 [IP_VS_S_NONE] = "NONE",
394 [IP_VS_S_ESTABLISHED] = "ESTABLISHED",
395 [IP_VS_S_SYN_SENT] = "SYN_SENT",
396 [IP_VS_S_SYN_RECV] = "SYN_RECV",
397 [IP_VS_S_FIN_WAIT] = "FIN_WAIT",
398 [IP_VS_S_TIME_WAIT] = "TIME_WAIT",
399 [IP_VS_S_CLOSE] = "CLOSE",
400 [IP_VS_S_CLOSE_WAIT] = "CLOSE_WAIT",
401 [IP_VS_S_LAST_ACK] = "LAST_ACK",
402 [IP_VS_S_LISTEN] = "LISTEN",
403 [IP_VS_S_SYNACK] = "SYNACK",
404 [IP_VS_S_UDP] = "UDP",
405 [IP_VS_S_ICMP] = "ICMP",
406 [IP_VS_S_LAST] = "BUG!",
409 #define sNO IP_VS_S_NONE
410 #define sES IP_VS_S_ESTABLISHED
411 #define sSS IP_VS_S_SYN_SENT
412 #define sSR IP_VS_S_SYN_RECV
413 #define sFW IP_VS_S_FIN_WAIT
414 #define sTW IP_VS_S_TIME_WAIT
415 #define sCL IP_VS_S_CLOSE
416 #define sCW IP_VS_S_CLOSE_WAIT
417 #define sLA IP_VS_S_LAST_ACK
418 #define sLI IP_VS_S_LISTEN
419 #define sSA IP_VS_S_SYNACK
421 struct vs_tcp_states_t {
422 int next_state[IP_VS_S_LAST]; /* should be _LAST_TCP */
425 const char * ip_vs_state_name(int state)
427 if (state >= IP_VS_S_LAST)
429 return state_name_table[state] ? state_name_table[state] : "?";
432 static struct vs_tcp_states_t vs_tcp_states [] = {
434 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
435 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
436 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
437 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
438 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
441 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
442 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
443 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
444 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
445 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
448 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
449 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
450 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
451 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
452 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
455 static struct vs_tcp_states_t vs_tcp_states_dos [] = {
457 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
458 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
459 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
460 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
461 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
464 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
465 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
466 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
467 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
468 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
471 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
472 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
473 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
474 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
475 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
478 static struct vs_tcp_states_t *ip_vs_state_table = vs_tcp_states;
480 void ip_vs_secure_tcp_set(int on)
483 ip_vs_state_table = vs_tcp_states_dos;
484 ip_vs_timeout_table = &vs_timeout_table_dos;
486 ip_vs_state_table = vs_tcp_states;
487 ip_vs_timeout_table = &vs_timeout_table;
492 static inline int vs_tcp_state_idx(struct tcphdr *th, int state_off)
495 * [0-3]: input states, [4-7]: output, [8-11] input only states.
509 static inline int vs_set_state_timeout(struct ip_vs_conn *cp, int state)
511 struct ip_vs_timeout_table *vstim = cp->timeout_table;
514 * Use default timeout table if no specific for this entry
517 vstim = &vs_timeout_table;
519 cp->timeout = vstim->timeout[cp->state=state];
522 int scale = vstim->scale;
525 cp->timeout >>= -scale;
527 cp->timeout <<= scale;
535 vs_tcp_state(struct ip_vs_conn *cp, int state_off, struct tcphdr *th)
538 int new_state = IP_VS_S_CLOSE;
541 * Update state offset to INPUT_ONLY if necessary
542 * or delete NO_OUTPUT flag if output packet detected
544 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
545 if (state_off == VS_STATE_OUTPUT)
546 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
548 state_off = VS_STATE_INPUT_ONLY;
551 if ((state_idx = vs_tcp_state_idx(th, state_off)) < 0) {
552 IP_VS_DBG(8, "vs_tcp_state_idx(%d)=%d!!!\n",
553 state_off, state_idx);
557 new_state = ip_vs_state_table[state_idx].next_state[cp->state];
560 if (new_state != cp->state) {
561 struct ip_vs_dest *dest = cp->dest;
563 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
564 "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
565 ip_vs_proto_name(cp->protocol),
566 (state_off==VS_STATE_OUTPUT)?"output ":"input ",
571 NIPQUAD(cp->daddr), ntohs(cp->dport),
572 NIPQUAD(cp->caddr), ntohs(cp->cport),
573 ip_vs_state_name(cp->state),
574 ip_vs_state_name(new_state),
575 atomic_read(&cp->refcnt));
577 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
578 (new_state != IP_VS_S_ESTABLISHED)) {
579 atomic_dec(&dest->activeconns);
580 atomic_inc(&dest->inactconns);
581 cp->flags |= IP_VS_CONN_F_INACTIVE;
582 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
583 (new_state == IP_VS_S_ESTABLISHED)) {
584 atomic_inc(&dest->activeconns);
585 atomic_dec(&dest->inactconns);
586 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
591 return vs_set_state_timeout(cp, new_state);
596 * Handle state transitions
598 int ip_vs_set_state(struct ip_vs_conn *cp,
599 int state_off, struct iphdr *iph, void *tp)
603 spin_lock(&cp->lock);
604 switch (iph->protocol) {
606 ret = vs_tcp_state(cp, state_off, tp);
609 ret = vs_set_state_timeout(cp, IP_VS_S_UDP);
612 ret = vs_set_state_timeout(cp, IP_VS_S_ICMP);
617 spin_unlock(&cp->lock);
624 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
626 int ip_vs_conn_listen(struct ip_vs_conn *cp)
628 vs_set_state_timeout(cp, IP_VS_S_LISTEN);
635 * Let packets bypass the destination when the destination is not
636 * available, it may be only used in transparent cache cluster.
638 static int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
640 struct rtable *rt; /* Route to the other host */
641 struct iphdr *iph = skb->nh.iph;
647 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) {
648 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
649 "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
654 mtu = rt->u.dst.pmtu;
655 if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
657 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
658 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
662 /* update checksum because skb might be defragmented */
665 if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {
666 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {
668 IP_VS_ERR_RL("ip_vs_bypass_xmit(): no memory\n");
674 dst_release(skb->dst);
675 skb->dst = &rt->u.dst;
677 #ifdef CONFIG_NETFILTER_DEBUG
678 skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
679 #endif /* CONFIG_NETFILTER_DEBUG */
680 skb->nfcache |= NFC_IPVS_PROPERTY;
687 dst_link_failure(skb);
695 * NULL transmitter (do nothing except return NF_ACCEPT)
697 static int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
704 * NAT transmitter (only for outside-to-inside nat forwarding)
706 static int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
708 struct rtable *rt; /* Route to the other host */
718 * If it has ip_vs_app helper, the helper may change the payload,
719 * so it needs full checksum checking and checksum calculation.
720 * If not, only the header (such as IP address and port number)
721 * will be changed, so it is fast to do incremental checksum update,
722 * and let the destination host do final checksum checking.
725 if (cp->app && skb_is_nonlinear(skb)
726 && skb_linearize(skb, GFP_ATOMIC) != 0)
731 h.raw = (char*) iph + ihl;
732 size = ntohs(iph->tot_len) - ihl;
734 /* do TCP/UDP checksum checking if it has application helper */
735 if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
736 switch (skb->ip_summed) {
738 skb->csum = csum_partial(h.raw, size, 0);
741 if (csum_tcpudp_magic(iph->saddr, iph->daddr, size,
742 iph->protocol, skb->csum)) {
743 IP_VS_DBG_RL("Incoming failed %s checksum "
744 "from %d.%d.%d.%d (size=%d)!\n",
745 ip_vs_proto_name(iph->protocol),
752 /* CHECKSUM_UNNECESSARY */
758 * Check if it is no_cport connection ...
760 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
761 if (ip_vs_conn_unhash(cp)) {
762 spin_lock(&cp->lock);
763 if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
764 atomic_dec(&ip_vs_conn_no_cport_cnt);
765 cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
766 cp->cport = h.portp[0];
767 IP_VS_DBG(10, "filled cport=%d\n", ntohs(cp->dport));
769 spin_unlock(&cp->lock);
771 /* hash on new dport */
776 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
780 mtu = rt->u.dst.pmtu;
781 if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
783 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
784 IP_VS_DBG_RL("ip_vs_nat_xmit(): frag needed\n");
789 dst_release(skb->dst);
790 skb->dst = &rt->u.dst;
792 /* copy-on-write the packet before mangling it */
793 if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len, &iph, &h.raw))
796 /* mangle the packet */
797 iph->daddr = cp->daddr;
798 h.portp[1] = cp->dport;
801 * Attempt ip_vs_app call.
802 * will fix ip_vs_conn and iph ack_seq stuff
804 if (ip_vs_app_pkt_in(cp, skb) != 0) {
805 /* skb data has probably changed, update pointers */
807 h.raw = (char*) iph + ihl;
808 size = skb->len - ihl;
812 * Adjust TCP/UDP checksums
814 if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
815 /* Only port and addr are changed, do fast csum update */
816 ip_vs_fast_check_update(&h, cp->vaddr, cp->daddr,
817 cp->vport, cp->dport, iph->protocol);
818 if (skb->ip_summed == CHECKSUM_HW)
819 skb->ip_summed = CHECKSUM_NONE;
821 /* full checksum calculation */
822 switch (iph->protocol) {
825 h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
827 csum_partial(h.raw, size, 0));
831 h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
833 csum_partial(h.raw, size, 0));
834 if (h.uh->check == 0)
835 h.uh->check = 0xFFFF;
838 skb->ip_summed = CHECKSUM_UNNECESSARY;
842 IP_VS_DBG(10, "NAT to %u.%u.%u.%u:%d\n",
843 NIPQUAD(iph->daddr), ntohs(h.portp[1]));
845 /* FIXME: when application helper enlarges the packet and the length
846 is larger than the MTU of outgoing device, there will be still
849 #ifdef CONFIG_NETFILTER_DEBUG
850 skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
851 #endif /* CONFIG_NETFILTER_DEBUG */
852 skb->nfcache |= NFC_IPVS_PROPERTY;
859 dst_link_failure(skb);
867 * IP Tunneling transmitter
869 * This function encapsulates the packet in a new IP packet, its
870 * destination will be set to cp->daddr. Most code of this function
871 * is taken from ipip.c.
873 * It is used in VS/TUN cluster. The load balancer selects a real
874 * server from a cluster based on a scheduling algorithm,
875 * encapsulates the request packet and forwards it to the selected
876 * server. For example, all real servers are configured with
877 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
878 * the encapsulated packet, it will decapsulate the packet, processe
879 * the request and return the response packets directly to the client
880 * without passing the load balancer. This can greatly increase the
881 * scalability of virtual server.
883 static int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
885 struct rtable *rt; /* Route to the other host */
886 struct net_device *tdev; /* Device to other host */
887 struct iphdr *old_iph = skb->nh.iph;
888 u8 tos = old_iph->tos;
889 u16 df = old_iph->frag_off;
890 struct iphdr *iph; /* Our new IP header */
891 int max_headroom; /* The extra header space needed */
896 if (skb->protocol != __constant_htons(ETH_P_IP)) {
897 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
898 "ETH_P_IP: %d, skb protocol: %d\n",
899 __constant_htons(ETH_P_IP), skb->protocol);
903 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
906 tdev = rt->u.dst.dev;
908 mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
911 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
914 if (skb->dst && mtu < skb->dst->pmtu)
915 skb->dst->pmtu = mtu;
917 df |= (old_iph->frag_off&__constant_htons(IP_DF));
919 if ((old_iph->frag_off&__constant_htons(IP_DF))
920 && mtu < ntohs(old_iph->tot_len)) {
921 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
923 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
927 /* update checksum because skb might be defragmented */
928 ip_send_check(old_iph);
931 * Okay, now see if we can stuff it in the buffer as-is.
933 max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
935 if (skb_headroom(skb) < max_headroom
936 || skb_cloned(skb) || skb_shared(skb)) {
937 struct sk_buff *new_skb =
938 skb_realloc_headroom(skb, max_headroom);
941 IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
946 old_iph = skb->nh.iph;
949 skb->h.raw = skb->nh.raw;
950 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
951 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
954 dst_release(skb->dst);
955 skb->dst = &rt->u.dst;
958 * Push down and install the IPIP header.
962 iph->ihl = sizeof(struct iphdr)>>2;
964 iph->protocol = IPPROTO_IPIP;
966 iph->daddr = rt->rt_dst;
967 iph->saddr = rt->rt_src;
968 iph->ttl = old_iph->ttl;
969 iph->tot_len = htons(skb->len);
970 ip_select_ident(iph, &rt->u.dst, NULL);
973 skb->ip_summed = CHECKSUM_NONE;
974 #ifdef CONFIG_NETFILTER_DEBUG
975 skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
976 #endif /* CONFIG_NETFILTER_DEBUG */
977 skb->nfcache |= NFC_IPVS_PROPERTY;
985 dst_link_failure(skb);
993 * Direct Routing transmitter
995 static int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp)
997 struct rtable *rt; /* Route to the other host */
998 struct iphdr *iph = skb->nh.iph;
1003 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
1007 mtu = rt->u.dst.pmtu;
1008 if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
1009 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1011 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
1015 /* update checksum because skb might be defragmented */
1018 if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {
1019 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {
1021 IP_VS_ERR_RL("ip_vs_dr_xmit(): no memory\n");
1026 /* drop old route */
1027 dst_release(skb->dst);
1028 skb->dst = &rt->u.dst;
1030 #ifdef CONFIG_NETFILTER_DEBUG
1031 skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
1032 #endif /* CONFIG_NETFILTER_DEBUG */
1033 skb->nfcache |= NFC_IPVS_PROPERTY;
1037 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
1044 dst_link_failure(skb);
1052 * Bind a connection entry with the corresponding packet_xmit.
1053 * Called by ip_vs_conn_new.
1055 static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
1057 switch (IP_VS_FWD_METHOD(cp)) {
1058 case IP_VS_CONN_F_MASQ:
1059 cp->packet_xmit = ip_vs_nat_xmit;
1062 case IP_VS_CONN_F_TUNNEL:
1063 cp->packet_xmit = ip_vs_tunnel_xmit;
1066 case IP_VS_CONN_F_DROUTE:
1067 cp->packet_xmit = ip_vs_dr_xmit;
1070 case IP_VS_CONN_F_LOCALNODE:
1071 cp->packet_xmit = ip_vs_null_xmit;
1074 case IP_VS_CONN_F_BYPASS:
1075 cp->packet_xmit = ip_vs_bypass_xmit;
1082 * Bind a connection entry with a virtual service destination
1083 * Called just after a new connection entry is created.
1086 ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
1088 /* if dest is NULL, then return directly */
1092 /* Increase the refcnt counter of the dest */
1093 atomic_inc(&dest->refcnt);
1095 /* Bind with the destination and its corresponding transmitter */
1096 cp->flags |= atomic_read(&dest->conn_flags);
1099 IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
1100 "d:%u.%u.%u.%u:%d fwd:%c s:%s flg:%X cnt:%d destcnt:%d\n",
1101 ip_vs_proto_name(cp->protocol),
1102 NIPQUAD(cp->caddr), ntohs(cp->cport),
1103 NIPQUAD(cp->vaddr), ntohs(cp->vport),
1104 NIPQUAD(cp->daddr), ntohs(cp->dport),
1105 ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
1106 cp->flags, atomic_read(&cp->refcnt),
1107 atomic_read(&dest->refcnt));
1112 * Unbind a connection entry with its VS destination
1113 * Called by the ip_vs_conn_expire function.
1115 static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
1117 struct ip_vs_dest *dest = cp->dest;
1119 /* if dest is NULL, then return directly */
1123 IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d "
1124 "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d fwd:%c "
1125 "s:%s flg:%X cnt:%d destcnt:%d\n",
1126 ip_vs_proto_name(cp->protocol),
1127 NIPQUAD(cp->caddr), ntohs(cp->cport),
1128 NIPQUAD(cp->vaddr), ntohs(cp->vport),
1129 NIPQUAD(cp->daddr), ntohs(cp->dport),
1130 ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
1131 cp->flags, atomic_read(&cp->refcnt),
1132 atomic_read(&dest->refcnt));
1135 * Decrease the inactconns or activeconns counter
1136 * if it is not a connection template
1138 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
1139 if (cp->flags & IP_VS_CONN_F_INACTIVE) {
1140 atomic_dec(&dest->inactconns);
1142 atomic_dec(&dest->activeconns);
1147 * Simply decrease the refcnt of the dest, because the
1148 * dest will be either in service's destination list
1151 atomic_dec(&dest->refcnt);
1156 * Checking if the destination of a connection template is available.
1157 * If available, return 1, otherwise invalidate this connection
1158 * template and return 0.
1160 int ip_vs_check_template(struct ip_vs_conn *ct)
1162 struct ip_vs_dest *dest = ct->dest;
1165 * Checking the dest server status.
1167 if ((dest == NULL) ||
1168 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
1169 (sysctl_ip_vs_expire_quiescent_template &&
1170 (atomic_read(&dest->weight) == 0))) {
1171 IP_VS_DBG(9, "check_template: dest not available for "
1172 "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
1173 "-> d:%u.%u.%u.%u:%d\n",
1174 ip_vs_proto_name(ct->protocol),
1175 NIPQUAD(ct->caddr), ntohs(ct->cport),
1176 NIPQUAD(ct->vaddr), ntohs(ct->vport),
1177 NIPQUAD(ct->daddr), ntohs(ct->dport));
1180 * Invalidate the connection template
1182 if (ct->vport != 65535) {
1183 if (ip_vs_conn_unhash(ct)) {
1187 ip_vs_conn_hash(ct);
1192 * Simply decrease the refcnt of the template,
1193 * don't restart its timer.
1195 atomic_dec(&ct->refcnt);
1203 ip_vs_timeout_attach(struct ip_vs_conn *cp, struct ip_vs_timeout_table *vstim)
1205 atomic_inc(&vstim->refcnt);
1206 cp->timeout_table = vstim;
1209 static inline void ip_vs_timeout_detach(struct ip_vs_conn *cp)
1211 struct ip_vs_timeout_table *vstim = cp->timeout_table;
1215 cp->timeout_table = NULL;
1216 atomic_dec(&vstim->refcnt);
1220 static void ip_vs_conn_expire(unsigned long data)
1222 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
1224 if (cp->timeout_table)
1225 cp->timeout = cp->timeout_table->timeout[IP_VS_S_TIME_WAIT];
1227 cp->timeout = vs_timeout_table.timeout[IP_VS_S_TIME_WAIT];
1232 atomic_inc(&cp->refcnt);
1235 * do I control anybody?
1237 if (atomic_read(&cp->n_control))
1241 * unhash it if it is hashed in the conn table
1243 if (!ip_vs_conn_unhash(cp))
1247 * refcnt==1 implies I'm the only one referrer
1249 if (likely(atomic_read(&cp->refcnt) == 1)) {
1250 /* make sure that there is no timer on it now */
1251 if (timer_pending(&cp->timer))
1252 del_timer(&cp->timer);
1254 /* does anybody control me? */
1256 ip_vs_control_del(cp);
1258 ip_vs_unbind_dest(cp);
1259 ip_vs_unbind_app(cp);
1260 ip_vs_timeout_detach(cp);
1261 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
1262 atomic_dec(&ip_vs_conn_no_cport_cnt);
1263 atomic_dec(&ip_vs_conn_count);
1265 kmem_cache_free(ip_vs_conn_cachep, cp);
1269 /* hash it back to the table */
1270 ip_vs_conn_hash(cp);
1273 IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
1274 atomic_read(&cp->refcnt)-1,
1275 atomic_read(&cp->n_control));
1281 void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
1284 mod_timer(&cp->timer, jiffies);
1288 * Create a new connection entry and hash it into the ip_vs_conn_tab.
1291 ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
1292 __u32 daddr, __u16 dport, unsigned flags,
1293 struct ip_vs_dest *dest)
1295 struct ip_vs_conn *cp;
1297 cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
1299 IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
1303 memset(cp, 0, sizeof(*cp));
1304 INIT_LIST_HEAD(&cp->c_list);
1305 init_timer(&cp->timer);
1306 cp->timer.data = (unsigned long)cp;
1307 cp->timer.function = ip_vs_conn_expire;
1308 ip_vs_timeout_attach(cp, ip_vs_timeout_table);
1309 cp->protocol = proto;
1317 cp->app_data = NULL;
1319 cp->lock = SPIN_LOCK_UNLOCKED;
1321 atomic_set(&cp->n_control, 0);
1322 atomic_set(&cp->in_pkts, 0);
1324 atomic_inc(&ip_vs_conn_count);
1325 if (flags & IP_VS_CONN_F_NO_CPORT)
1326 atomic_inc(&ip_vs_conn_no_cport_cnt);
1328 /* Bind its application helper (only for VS/NAT) if any */
1331 /* Bind the connection with a destination server */
1332 ip_vs_bind_dest(cp, dest);
1334 /* Set its state and timeout */
1335 vs_set_state_timeout(cp, IP_VS_S_NONE);
1337 /* Bind its packet transmitter */
1338 ip_vs_bind_xmit(cp);
1341 * Set the entry is referenced by the current thread before hashing
1342 * it in the table, so that other thread run ip_vs_random_dropentry
1343 * but cannot drop this entry.
1345 atomic_set(&cp->refcnt, 1);
1347 /* Hash it in the ip_vs_conn_tab finally */
1348 ip_vs_conn_hash(cp);
1355 * /proc/net/ip_vs_conn entries
1358 ip_vs_conn_getinfo(char *buffer, char **start, off_t offset, int length)
1363 struct ip_vs_conn *cp;
1364 struct list_head *l, *e;
1368 len += sprintf(buffer+len, "%-127s\n",
1369 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires");
1372 for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
1374 * Lock is actually only need in next loop
1375 * we are called from uspace: must stop bh.
1377 ct_read_lock_bh(idx);
1379 l = &ip_vs_conn_tab[idx];
1380 for (e=l->next; e!=l; e=e->next) {
1381 cp = list_entry(e, struct ip_vs_conn, c_list);
1386 "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu",
1387 ip_vs_proto_name(cp->protocol),
1388 ntohl(cp->caddr), ntohs(cp->cport),
1389 ntohl(cp->vaddr), ntohs(cp->vport),
1390 ntohl(cp->daddr), ntohs(cp->dport),
1391 ip_vs_state_name(cp->state),
1392 (cp->timer.expires-jiffies)/HZ);
1393 len += sprintf(buffer+len, "%-127s\n", temp);
1394 if (pos >= offset+length) {
1395 ct_read_unlock_bh(idx);
1399 ct_read_unlock_bh(idx);
1403 *start = buffer+len-(pos-offset); /* Start of wanted data */
1414 * Randomly drop connection entries before running out of memory
1416 static inline int todrop_entry(struct ip_vs_conn *cp)
1419 * The drop rate array needs tuning for real environments.
1420 * Called from timer bh only => no locking
1422 static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
1423 static char todrop_counter[9] = {0};
1426 /* if the conn entry hasn't lasted for 60 seconds, don't drop it.
1427 This will leave enough time for normal connection to get
1429 if (cp->timeout+jiffies-cp->timer.expires < 60*HZ)
1432 /* Don't drop the entry if its number of incoming packets is not
1433 located in [0, 8] */
1434 i = atomic_read(&cp->in_pkts);
1435 if (i > 8 || i < 0) return 0;
1437 if (!todrop_rate[i]) return 0;
1438 if (--todrop_counter[i] > 0) return 0;
1440 todrop_counter[i] = todrop_rate[i];
1445 void ip_vs_random_dropentry(void)
1448 struct ip_vs_conn *cp;
1449 struct list_head *l,*e;
1452 * Randomly scan 1/32 of the whole table every second
1454 for (idx=0; idx<(IP_VS_CONN_TAB_SIZE>>5); idx++) {
1455 unsigned hash = net_random()&IP_VS_CONN_TAB_MASK;
1458 * Lock is actually needed in this loop.
1460 ct_write_lock(hash);
1462 l = &ip_vs_conn_tab[hash];
1463 for (e=l->next; e!=l; e=e->next) {
1464 cp = list_entry(e, struct ip_vs_conn, c_list);
1465 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1466 /* connection template */
1469 case IP_VS_S_SYN_RECV:
1470 case IP_VS_S_SYNACK:
1473 case IP_VS_S_ESTABLISHED:
1475 if (todrop_entry(cp))
1483 IP_VS_DBG(4, "del connection\n");
1484 ip_vs_conn_expire_now(cp);
1486 IP_VS_DBG(4, "del conn template\n");
1487 ip_vs_conn_expire_now(cp->control);
1490 ct_write_unlock(hash);
1496 * Flush all the connection entries in the ip_vs_conn_tab
1498 static void ip_vs_conn_flush(void)
1501 struct ip_vs_conn *cp;
1502 struct list_head *l,*e;
1505 for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
1507 * Lock is actually needed in this loop.
1509 ct_write_lock_bh(idx);
1511 l = &ip_vs_conn_tab[idx];
1512 for (e=l->next; e!=l; e=e->next) {
1513 cp = list_entry(e, struct ip_vs_conn, c_list);
1515 IP_VS_DBG(4, "del connection\n");
1516 ip_vs_conn_expire_now(cp);
1518 IP_VS_DBG(4, "del conn template\n");
1519 ip_vs_conn_expire_now(cp->control);
1522 ct_write_unlock_bh(idx);
1525 /* the counter may be not NULL, because maybe some conn entries
1526 are run by slow timer handler or unhashed but still referred */
1527 if (atomic_read(&ip_vs_conn_count) != 0) {
1534 int ip_vs_conn_init(void)
1539 * Allocate the connection hash table and initialize its list heads
1541 ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
1542 if (!ip_vs_conn_tab)
1545 IP_VS_INFO("Connection hash table configured "
1546 "(size=%d, memory=%ldKbytes)\n",
1547 IP_VS_CONN_TAB_SIZE,
1548 (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
1549 IP_VS_DBG(0, "Each connection entry needs %d bytes at least\n",
1550 sizeof(struct ip_vs_conn));
1552 for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
1553 INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
1556 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
1557 __ip_vs_conntbl_lock_array[idx].l = RW_LOCK_UNLOCKED;
1560 /* Allocate ip_vs_conn slab cache */
1561 ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
1562 sizeof(struct ip_vs_conn), 0,
1563 SLAB_HWCACHE_ALIGN, NULL, NULL);
1564 if (!ip_vs_conn_cachep) {
1565 vfree(ip_vs_conn_tab);
1569 proc_net_create("ip_vs_conn", 0, ip_vs_conn_getinfo);
1571 /* calculate the random value for connection hash */
1572 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
1577 void ip_vs_conn_cleanup(void)
1579 /* flush all the connection entries first */
1582 /* Release the empty cache */
1583 kmem_cache_destroy(ip_vs_conn_cachep);
1584 proc_net_remove("ip_vs_conn");
1585 vfree(ip_vs_conn_tab);