2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Version: $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
27 #include <linux/sysctl.h>
28 #include <linux/proc_fs.h>
29 #include <linux/timer.h>
30 #include <linux/swap.h>
31 #include <linux/proc_fs.h>
33 #include <linux/netfilter.h>
34 #include <linux/netfilter_ipv4.h>
39 #include <asm/uaccess.h>
41 #include <net/ip_vs.h>
43 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
44 static DECLARE_MUTEX(__ip_vs_mutex);
46 /* lock for service table */
47 rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
49 /* lock for table with the real services */
50 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
52 /* lock for state and timeout tables */
53 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
55 /* lock for drop entry handling */
56 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
58 /* lock for drop packet handling */
59 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
61 /* 1/rate drop and drop-entry variables */
62 int ip_vs_drop_rate = 0;
63 int ip_vs_drop_counter = 0;
64 atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
66 /* number of virtual services */
67 static int ip_vs_num_services = 0;
69 /* sysctl variables */
70 static int sysctl_ip_vs_drop_entry = 0;
71 static int sysctl_ip_vs_drop_packet = 0;
72 static int sysctl_ip_vs_secure_tcp = 0;
73 static int sysctl_ip_vs_amemthresh = 2048;
74 static int sysctl_ip_vs_am_droprate = 10;
75 int sysctl_ip_vs_cache_bypass = 0;
76 int sysctl_ip_vs_expire_nodest_conn = 0;
77 int sysctl_ip_vs_expire_quiescent_template = 0;
78 int sysctl_ip_vs_sync_threshold = 3;
79 int sysctl_ip_vs_nat_icmp_send = 0;
81 #ifdef CONFIG_IP_VS_DEBUG
82 static int sysctl_ip_vs_debug_level = 0;
84 int ip_vs_get_debug_level(void)
86 return sysctl_ip_vs_debug_level;
91 * update_defense_level is called from timer bh and from sysctl.
93 static void update_defense_level(void)
99 /* we only count free and buffered memory (in pages) */
101 availmem = i.freeram + i.bufferram;
103 nomem = (availmem < sysctl_ip_vs_amemthresh);
106 spin_lock(&__ip_vs_dropentry_lock);
107 switch (sysctl_ip_vs_drop_entry) {
109 atomic_set(&ip_vs_dropentry, 0);
113 atomic_set(&ip_vs_dropentry, 1);
114 sysctl_ip_vs_drop_entry = 2;
116 atomic_set(&ip_vs_dropentry, 0);
121 atomic_set(&ip_vs_dropentry, 1);
123 atomic_set(&ip_vs_dropentry, 0);
124 sysctl_ip_vs_drop_entry = 1;
128 atomic_set(&ip_vs_dropentry, 1);
131 spin_unlock(&__ip_vs_dropentry_lock);
134 spin_lock(&__ip_vs_droppacket_lock);
135 switch (sysctl_ip_vs_drop_packet) {
141 ip_vs_drop_rate = ip_vs_drop_counter
142 = sysctl_ip_vs_amemthresh /
143 (sysctl_ip_vs_amemthresh - availmem);
144 sysctl_ip_vs_drop_packet = 2;
151 ip_vs_drop_rate = ip_vs_drop_counter
152 = sysctl_ip_vs_amemthresh /
153 (sysctl_ip_vs_amemthresh - availmem);
156 sysctl_ip_vs_drop_packet = 1;
160 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
163 spin_unlock(&__ip_vs_droppacket_lock);
166 write_lock(&__ip_vs_securetcp_lock);
167 switch (sysctl_ip_vs_secure_tcp) {
169 ip_vs_secure_tcp_set(0);
173 ip_vs_secure_tcp_set(1);
174 sysctl_ip_vs_secure_tcp = 2;
176 ip_vs_secure_tcp_set(0);
181 ip_vs_secure_tcp_set(1);
183 ip_vs_secure_tcp_set(0);
184 sysctl_ip_vs_secure_tcp = 1;
188 ip_vs_secure_tcp_set(1);
191 write_unlock(&__ip_vs_securetcp_lock);
196 * Timer for checking the defense
198 static struct timer_list defense_timer;
199 #define DEFENSE_TIMER_PERIOD 1*HZ
201 static void defense_timer_handler(unsigned long data)
203 update_defense_level();
204 if (atomic_read(&ip_vs_dropentry))
205 ip_vs_random_dropentry();
207 mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
212 * Hash table: for virtual service lookups
214 #define IP_VS_SVC_TAB_BITS 8
215 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
216 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
218 /* the service table hashed by <protocol, addr, port> */
219 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
220 /* the service table hashed by fwmark */
221 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
224 * Hash table: for real service lookups
226 #define IP_VS_RTAB_BITS 4
227 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
228 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
230 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
233 * Trash for destinations
235 static LIST_HEAD(ip_vs_dest_trash);
238 * FTP & NULL virtual service counters
240 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
241 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
245 * Returns hash value for virtual service
247 static __inline__ unsigned
248 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
250 register unsigned porth = ntohs(port);
252 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
253 & IP_VS_SVC_TAB_MASK;
257 * Returns hash value of fwmark for virtual service lookup
259 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
261 return fwmark & IP_VS_SVC_TAB_MASK;
265 * Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
266 * or in the ip_vs_svc_fwm_table by fwmark.
267 * Should be called with locked tables.
268 * Returns bool success.
270 static int ip_vs_svc_hash(struct ip_vs_service *svc)
274 if (svc->flags & IP_VS_SVC_F_HASHED) {
275 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
276 "called from %p\n", __builtin_return_address(0));
280 if (svc->fwmark == 0) {
282 * Hash it by <protocol,addr,port> in ip_vs_svc_table
284 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
285 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
288 * Hash it by fwmark in ip_vs_svc_fwm_table
290 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
291 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
294 svc->flags |= IP_VS_SVC_F_HASHED;
295 /* increase its refcnt because it is referenced by the svc table */
296 atomic_inc(&svc->refcnt);
302 * Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
303 * Should be called with locked tables.
304 * Returns bool success.
306 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
308 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
309 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
310 "called from %p\n", __builtin_return_address(0));
314 if (svc->fwmark == 0) {
316 * Remove it from the ip_vs_svc_table table.
318 list_del(&svc->s_list);
321 * Remove it from the ip_vs_svc_fwm_table table.
323 list_del(&svc->f_list);
326 svc->flags &= ~IP_VS_SVC_F_HASHED;
327 atomic_dec(&svc->refcnt);
333 * Get service by {proto,addr,port} in the service table.
335 static __inline__ struct ip_vs_service *
336 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
339 struct ip_vs_service *svc;
340 struct list_head *l,*e;
343 * Check for "full" addressed entries
345 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
347 l = &ip_vs_svc_table[hash];
348 for (e=l->next; e!=l; e=e->next) {
349 svc = list_entry(e, struct ip_vs_service, s_list);
350 if ((svc->addr == vaddr)
351 && (svc->port == vport)
352 && (svc->protocol == protocol)) {
354 atomic_inc(&svc->usecnt);
364 * Get service by {fwmark} in the service table.
366 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
369 struct ip_vs_service *svc;
370 struct list_head *l,*e;
373 * Check for "full" addressed entries
375 hash = ip_vs_svc_fwm_hashkey(fwmark);
377 l = &ip_vs_svc_fwm_table[hash];
378 for (e=l->next; e!=l; e=e->next) {
379 svc = list_entry(e, struct ip_vs_service, f_list);
380 if (svc->fwmark == fwmark) {
382 atomic_inc(&svc->usecnt);
390 struct ip_vs_service *
391 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
393 struct ip_vs_service *svc;
395 read_lock(&__ip_vs_svc_lock);
398 * Check the table hashed by fwmark first
400 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
404 * Check the table hashed by <protocol,addr,port>
405 * for "full" addressed entries
407 svc = __ip_vs_service_get(protocol, vaddr, vport);
410 && protocol == IPPROTO_TCP
411 && atomic_read(&ip_vs_ftpsvc_counter)
412 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
414 * Check if ftp service entry exists, the packet
415 * might belong to FTP data connections.
417 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
421 && atomic_read(&ip_vs_nullsvc_counter)) {
423 * Check if the catch-all port (port zero) exists
425 svc = __ip_vs_service_get(protocol, vaddr, 0);
429 read_unlock(&__ip_vs_svc_lock);
431 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
432 fwmark, ip_vs_proto_name(protocol),
433 NIPQUAD(vaddr), ntohs(vport),
434 svc?"hit":"not hit");
441 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
443 atomic_inc(&svc->refcnt);
448 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
450 struct ip_vs_service *svc = dest->svc;
453 if (atomic_dec_and_test(&svc->refcnt))
458 * Returns hash value for real service
460 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
462 register unsigned porth = ntohs(port);
464 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
469 * Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
470 * should be called with locked tables.
471 * returns bool success.
473 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
477 if (!list_empty(&dest->d_list)) {
482 * Hash by proto,addr,port,
483 * which are the parameters of the real service.
485 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
486 list_add(&dest->d_list, &ip_vs_rtable[hash]);
492 * UNhashes ip_vs_dest from ip_vs_rtable.
493 * should be called with locked tables.
494 * returns bool success.
496 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
499 * Remove it from the ip_vs_rtable table.
501 if (!list_empty(&dest->d_list)) {
502 list_del(&dest->d_list);
503 INIT_LIST_HEAD(&dest->d_list);
510 * Lookup real service by {proto,addr,port} in the real service table.
513 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
516 struct ip_vs_dest *dest;
517 struct list_head *l,*e;
520 * Check for "full" addressed entries
521 * Return the first found entry
523 hash = ip_vs_rs_hashkey(daddr, dport);
525 l = &ip_vs_rtable[hash];
527 read_lock(&__ip_vs_rs_lock);
528 for (e=l->next; e!=l; e=e->next) {
529 dest = list_entry(e, struct ip_vs_dest, d_list);
530 if ((dest->addr == daddr)
531 && (dest->port == dport)
532 && ((dest->protocol == protocol) ||
535 read_unlock(&__ip_vs_rs_lock);
539 read_unlock(&__ip_vs_rs_lock);
545 * Lookup destination by {addr,port} in the given service
547 static struct ip_vs_dest *
548 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
550 struct ip_vs_dest *dest;
551 struct list_head *l, *e;
554 * Find the destination for the given service
556 l = &svc->destinations;
557 for (e=l->next; e!=l; e=e->next) {
558 dest = list_entry(e, struct ip_vs_dest, n_list);
559 if ((dest->addr == daddr) && (dest->port == dport)) {
570 * Lookup dest by {svc,addr,port} in the destination trash.
571 * The destination trash is used to hold the destinations that are removed
572 * from the service table but are still referenced by some conn entries.
573 * The reason to add the destination trash is when the dest is temporary
574 * down (either by administrator or by monitor program), the dest can be
575 * picked back from the trash, the remaining connections to the dest can
576 * continue, and the counting information of the dest is also useful for
579 static struct ip_vs_dest *
580 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
582 struct ip_vs_dest *dest;
583 struct list_head *l, *e;
586 * Find the destination in trash
588 l = &ip_vs_dest_trash;
590 for (e=l->next; e!=l; e=e->next) {
591 dest = list_entry(e, struct ip_vs_dest, n_list);
592 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
595 NIPQUAD(dest->addr), ntohs(dest->port),
596 atomic_read(&dest->refcnt));
597 if (dest->addr == daddr &&
598 dest->port == dport &&
599 dest->vfwmark == svc->fwmark &&
600 dest->protocol == svc->protocol &&
602 (dest->vaddr == svc->addr &&
603 dest->vport == svc->port))) {
609 * Try to purge the destination from trash if not referenced
611 if (atomic_read(&dest->refcnt) == 1) {
612 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
615 NIPQUAD(dest->addr), ntohs(dest->port));
617 list_del(&dest->n_list);
618 __ip_vs_dst_reset(dest);
619 __ip_vs_unbind_svc(dest);
629 * Clean up all the destinations in the trash
630 * Called by the ip_vs_control_cleanup()
632 * When the ip_vs_control_clearup is activated by ipvs module exit,
633 * the service tables must have been flushed and all the connections
634 * are expired, and the refcnt of each destination in the trash must
635 * be 1, so we simply release them here.
637 static void ip_vs_trash_cleanup(void)
639 struct ip_vs_dest *dest;
642 l = &ip_vs_dest_trash;
644 while (l->next != l) {
645 dest = list_entry(l->next, struct ip_vs_dest, n_list);
646 list_del(&dest->n_list);
647 __ip_vs_dst_reset(dest);
648 __ip_vs_unbind_svc(dest);
655 __ip_vs_zero_stats(struct ip_vs_stats *stats)
657 spin_lock_bh(&stats->lock);
658 memset(stats, 0, (char *)&stats->lock - (char *)stats);
659 spin_unlock_bh(&stats->lock);
660 ip_vs_zero_estimator(stats);
664 * Update a destination in the given service
666 static void __ip_vs_update_dest(struct ip_vs_service *svc,
667 struct ip_vs_dest *dest,
668 struct ip_vs_rule_user *ur)
673 * Set the weight and the flags
675 atomic_set(&dest->weight, ur->weight);
677 conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
680 * Check if local node and update the flags
682 if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 | IP_VS_CONN_F_LOCALNODE;
688 * Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
690 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
691 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
694 * Put the real service in ip_vs_rtable if not present.
695 * For now only for NAT!
697 write_lock_bh(&__ip_vs_rs_lock);
699 write_unlock_bh(&__ip_vs_rs_lock);
701 atomic_set(&dest->conn_flags, conn_flags);
703 /* bind the service */
705 __ip_vs_bind_svc(dest, svc);
707 if (dest->svc != svc) {
708 __ip_vs_unbind_svc(dest);
709 __ip_vs_zero_stats(&dest->stats);
710 __ip_vs_bind_svc(dest, svc);
714 /* set the dest status flags */
715 dest->flags |= IP_VS_DEST_F_AVAILABLE;
720 * Create a destination for the given service
723 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
724 struct ip_vs_dest **destp)
726 struct ip_vs_dest *dest;
731 atype = inet_addr_type(ur->daddr);
732 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
735 *destp = dest = (struct ip_vs_dest*)
736 kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
738 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
741 memset(dest, 0, sizeof(struct ip_vs_dest));
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = ur->daddr;
748 dest->port = ur->dport;
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->refcnt, 0);
754 INIT_LIST_HEAD(&dest->d_list);
755 dest->dst_lock = SPIN_LOCK_UNLOCKED;
756 dest->stats.lock = SPIN_LOCK_UNLOCKED;
757 __ip_vs_update_dest(svc, dest, ur);
758 ip_vs_new_estimator(&dest->stats);
766 * Add a destination into an existing service
768 static int ip_vs_add_dest(struct ip_vs_service *svc,
769 struct ip_vs_rule_user *ur)
771 struct ip_vs_dest *dest;
772 __u32 daddr = ur->daddr;
773 __u16 dport = ur->dport;
778 if (ur->weight < 0) {
779 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
784 * Check if the dest already exists in the list
786 dest = ip_vs_lookup_dest(svc, daddr, dport);
788 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
793 * Check if the dest already exists in the trash and
794 * is from the same service
796 dest = ip_vs_trash_get_dest(svc, daddr, dport);
798 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
799 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
800 NIPQUAD(daddr), ntohs(dport),
801 atomic_read(&dest->refcnt),
803 NIPQUAD(dest->vaddr),
805 __ip_vs_update_dest(svc, dest, ur);
808 * Get the destination from the trash
810 list_del(&dest->n_list);
812 ip_vs_new_estimator(&dest->stats);
814 write_lock_bh(&__ip_vs_svc_lock);
817 * Wait until all other svc users go away.
819 while (atomic_read(&svc->usecnt) > 1) {};
821 list_add(&dest->n_list, &svc->destinations);
824 /* call the update_service function of its scheduler */
825 svc->scheduler->update_service(svc);
827 write_unlock_bh(&__ip_vs_svc_lock);
832 * Allocate and initialize the dest structure
834 ret = ip_vs_new_dest(svc, ur, &dest);
840 * Add the dest entry into the list
842 atomic_inc(&dest->refcnt);
844 write_lock_bh(&__ip_vs_svc_lock);
847 * Wait until all other svc users go away.
849 while (atomic_read(&svc->usecnt) > 1) {};
851 list_add(&dest->n_list, &svc->destinations);
854 /* call the update_service function of its scheduler */
855 svc->scheduler->update_service(svc);
857 write_unlock_bh(&__ip_vs_svc_lock);
866 * Edit a destination in the given service
868 static int ip_vs_edit_dest(struct ip_vs_service *svc,
869 struct ip_vs_rule_user *ur)
871 struct ip_vs_dest *dest;
872 __u32 daddr = ur->daddr;
873 __u16 dport = ur->dport;
877 if (ur->weight < 0) {
878 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
883 * Lookup the destination list
885 dest = ip_vs_lookup_dest(svc, daddr, dport);
887 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
891 __ip_vs_update_dest(svc, dest, ur);
893 write_lock_bh(&__ip_vs_svc_lock);
895 /* Wait until all other svc users go away */
896 while (atomic_read(&svc->usecnt) > 1) {};
898 /* call the update_service, because server weight may be changed */
899 svc->scheduler->update_service(svc);
901 write_unlock_bh(&__ip_vs_svc_lock);
910 * Delete a destination (must be already unlinked from the service)
912 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
914 ip_vs_kill_estimator(&dest->stats);
917 * Remove it from the d-linked list with the real services.
919 write_lock_bh(&__ip_vs_rs_lock);
920 ip_vs_rs_unhash(dest);
921 write_unlock_bh(&__ip_vs_rs_lock);
924 * Decrease the refcnt of the dest, and free the dest
925 * if nobody refers to it (refcnt=0). Otherwise, throw
926 * the destination into the trash.
928 if (atomic_dec_and_test(&dest->refcnt)) {
929 __ip_vs_dst_reset(dest);
930 /* simply decrease svc->refcnt here, let the caller check
931 and release the service if nobody refers to it.
932 Only user context can release destination and service,
933 and only one user context can update virtual service at a
934 time, so the operation here is OK */
935 atomic_dec(&dest->svc->refcnt);
938 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
939 NIPQUAD(dest->addr), ntohs(dest->port),
940 atomic_read(&dest->refcnt));
941 list_add(&dest->n_list, &ip_vs_dest_trash);
942 atomic_inc(&dest->refcnt);
948 * Unlink a destination from the given service
950 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
951 struct ip_vs_dest *dest,
954 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
957 * Remove it from the d-linked destination list.
959 list_del(&dest->n_list);
963 * Call the update_service function of its scheduler
965 svc->scheduler->update_service(svc);
971 * Delete a destination server in the given service
973 static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
975 struct ip_vs_dest *dest;
976 __u32 daddr = ur->daddr;
977 __u16 dport = ur->dport;
981 dest = ip_vs_lookup_dest(svc, daddr, dport);
983 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
987 write_lock_bh(&__ip_vs_svc_lock);
990 * Wait until all other svc users go away.
992 while (atomic_read(&svc->usecnt) > 1) {};
995 * Unlink dest from the service
997 __ip_vs_unlink_dest(svc, dest, 1);
999 write_unlock_bh(&__ip_vs_svc_lock);
1002 * Delete the destination
1004 __ip_vs_del_dest(dest);
1013 * Add a service into the service hash table
1016 ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1019 struct ip_vs_scheduler *sched;
1020 struct ip_vs_service *svc = NULL;
1025 * Lookup the scheduler, by 'ur->sched_name'
1027 sched = ip_vs_scheduler_get(ur->sched_name);
1028 if (sched == NULL) {
1029 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1035 svc = (struct ip_vs_service*)
1036 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1038 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1042 memset(svc, 0, sizeof(struct ip_vs_service));
1044 svc->protocol = ur->protocol;
1045 svc->addr = ur->vaddr;
1046 svc->port = ur->vport;
1047 svc->fwmark = ur->vfwmark;
1048 svc->flags = ur->vs_flags;
1049 svc->timeout = ur->timeout * HZ;
1050 svc->netmask = ur->netmask;
1052 INIT_LIST_HEAD(&svc->destinations);
1053 svc->sched_lock = RW_LOCK_UNLOCKED;
1054 svc->stats.lock = SPIN_LOCK_UNLOCKED;
1057 * Bind the scheduler
1059 ret = ip_vs_bind_scheduler(svc, sched);
1065 * Update the virtual service counters
1067 if (svc->port == FTPPORT)
1068 atomic_inc(&ip_vs_ftpsvc_counter);
1069 else if (svc->port == 0)
1070 atomic_inc(&ip_vs_nullsvc_counter);
1073 * I'm the first user of the service
1075 atomic_set(&svc->usecnt, 1);
1076 atomic_set(&svc->refcnt, 0);
1078 ip_vs_new_estimator(&svc->stats);
1079 ip_vs_num_services++;
1082 * Hash the service into the service table
1084 write_lock_bh(&__ip_vs_svc_lock);
1085 ip_vs_svc_hash(svc);
1086 write_unlock_bh(&__ip_vs_svc_lock);
1094 ip_vs_scheduler_put(sched);
1102 * Edit a service and bind it with a new scheduler
1104 static int ip_vs_edit_service(struct ip_vs_service *svc,
1105 struct ip_vs_rule_user *ur)
1107 struct ip_vs_scheduler *sched, *old_sched;
1111 * Lookup the scheduler, by 'ur->sched_name'
1113 sched = ip_vs_scheduler_get(ur->sched_name);
1114 if (sched == NULL) {
1115 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1120 write_lock_bh(&__ip_vs_svc_lock);
1123 * Wait until all other svc users go away.
1125 while (atomic_read(&svc->usecnt) > 1) {};
1128 * Set the flags and timeout value
1130 svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131 svc->timeout = ur->timeout * HZ;
1132 svc->netmask = ur->netmask;
1134 old_sched = svc->scheduler;
1135 if (sched != old_sched) {
1137 * Unbind the old scheduler
1139 if ((ret = ip_vs_unbind_scheduler(svc))) {
1145 * Bind the new scheduler
1147 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1149 * If ip_vs_bind_scheduler fails, restore the old
1151 * The main reason of failure is out of memory.
1153 * The question is if the old scheduler can be
1154 * restored all the time. TODO: if it cannot be
1155 * restored some time, we must delete the service,
1156 * otherwise the system may crash.
1158 ip_vs_bind_scheduler(svc, old_sched);
1164 write_unlock_bh(&__ip_vs_svc_lock);
1167 ip_vs_scheduler_put(old_sched);
1174 * Delete a service from the service list
1175 * The service must be unlinked, unlocked and not referenced!
1177 static void __ip_vs_del_service(struct ip_vs_service *svc)
1179 struct list_head *l;
1180 struct ip_vs_dest *dest;
1181 struct ip_vs_scheduler *old_sched;
1183 ip_vs_num_services--;
1184 ip_vs_kill_estimator(&svc->stats);
1189 old_sched = svc->scheduler;
1190 ip_vs_unbind_scheduler(svc);
1191 if (old_sched && old_sched->module)
1192 __MOD_DEC_USE_COUNT(old_sched->module);
1195 * Unlink the whole destination list
1197 l = &svc->destinations;
1198 while (l->next != l) {
1199 dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200 __ip_vs_unlink_dest(svc, dest, 0);
1201 __ip_vs_del_dest(dest);
1205 * Update the virtual service counters
1207 if (svc->port == FTPPORT)
1208 atomic_dec(&ip_vs_ftpsvc_counter);
1209 else if (svc->port == 0)
1210 atomic_dec(&ip_vs_nullsvc_counter);
1213 * Free the service if nobody refers to it
1215 if (atomic_read(&svc->refcnt) == 0)
1221 * Delete a service from the service list
1223 static int ip_vs_del_service(struct ip_vs_service *svc)
1229 * Unhash it from the service table
1231 write_lock_bh(&__ip_vs_svc_lock);
1233 ip_vs_svc_unhash(svc);
1236 * Wait until all the svc users go away.
1238 while (atomic_read(&svc->usecnt) > 1) {};
1240 __ip_vs_del_service(svc);
1242 write_unlock_bh(&__ip_vs_svc_lock);
1249 * Flush all the virtual services
1251 static int ip_vs_flush(void)
1254 struct ip_vs_service *svc;
1255 struct list_head *l;
1258 * Flush the service table hashed by <protocol,addr,port>
1260 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261 l = &ip_vs_svc_table[idx];
1262 while (l->next != l) {
1263 svc = list_entry(l->next,struct ip_vs_service,s_list);
1264 write_lock_bh(&__ip_vs_svc_lock);
1265 ip_vs_svc_unhash(svc);
1267 * Wait until all the svc users go away.
1269 while (atomic_read(&svc->usecnt) > 0) {};
1270 __ip_vs_del_service(svc);
1271 write_unlock_bh(&__ip_vs_svc_lock);
1276 * Flush the service table hashed by fwmark
1278 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279 l = &ip_vs_svc_fwm_table[idx];
1280 while (l->next != l) {
1281 svc = list_entry(l->next,struct ip_vs_service,f_list);
1282 write_lock_bh(&__ip_vs_svc_lock);
1283 ip_vs_svc_unhash(svc);
1285 * Wait until all the svc users go away.
1287 while (atomic_read(&svc->usecnt) > 0) {};
1288 __ip_vs_del_service(svc);
1289 write_unlock_bh(&__ip_vs_svc_lock);
1298 * Zero counters in a service or all services
1300 static int ip_vs_zero_service(struct ip_vs_service *svc)
1302 struct list_head *l;
1303 struct ip_vs_dest *dest;
1305 write_lock_bh(&__ip_vs_svc_lock);
1306 list_for_each (l, &svc->destinations) {
1307 dest = list_entry(l, struct ip_vs_dest, n_list);
1308 __ip_vs_zero_stats(&dest->stats);
1310 __ip_vs_zero_stats(&svc->stats);
1311 write_unlock_bh(&__ip_vs_svc_lock);
1315 static int ip_vs_zero_all(void)
1318 struct list_head *l;
1319 struct ip_vs_service *svc;
1321 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322 list_for_each (l, &ip_vs_svc_table[idx]) {
1323 svc = list_entry(l, struct ip_vs_service, s_list);
1324 ip_vs_zero_service(svc);
1328 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330 svc = list_entry(l, struct ip_vs_service, f_list);
1331 ip_vs_zero_service(svc);
1335 __ip_vs_zero_stats(&ip_vs_stats);
1340 static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341 struct file * filp, void *buffer, size_t *lenp)
1343 int *valp = ctl->data;
1347 ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348 if (write && (*valp != val)) {
1349 if ((*valp < 0) || (*valp > 3)) {
1350 /* Restore the correct value */
1354 update_defense_level();
1365 struct ip_vs_sysctl_table {
1366 struct ctl_table_header *sysctl_header;
1367 ctl_table vs_vars[NET_IPV4_VS_LAST];
1368 ctl_table vs_dir[2];
1369 ctl_table ipv4_dir[2];
1370 ctl_table root_dir[2];
1374 static struct ip_vs_sysctl_table ipv4_vs_table = {
1376 {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377 &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1379 #ifdef CONFIG_IP_VS_DEBUG
1380 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381 &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1384 {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385 &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1387 {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388 &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389 &ip_vs_sysctl_defense_mode},
1390 {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391 &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392 &ip_vs_sysctl_defense_mode},
1393 {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394 &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395 &ip_vs_sysctl_defense_mode},
1396 {NET_IPV4_VS_TO_ES, "timeout_established",
1397 &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399 {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402 {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405 {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406 &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408 {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409 &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411 {NET_IPV4_VS_TO_CL, "timeout_close",
1412 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414 {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417 {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418 &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420 {NET_IPV4_VS_TO_LI, "timeout_listen",
1421 &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423 {NET_IPV4_VS_TO_SA, "timeout_synack",
1424 &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426 {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427 &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429 {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430 &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433 &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1435 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436 &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1438 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439 &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1441 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442 &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1444 {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445 &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1448 {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1450 {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1452 {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1458 * Write the contents of the VS rule table to a PROCfs file.
1459 * (It is kept just for backward compatibility)
1461 static inline char *ip_vs_fwd_name(unsigned flags)
1465 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466 case IP_VS_CONN_F_LOCALNODE:
1469 case IP_VS_CONN_F_TUNNEL:
1472 case IP_VS_CONN_F_DROUTE:
1481 static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1485 char temp[64], temp2[32];
1487 struct ip_vs_service *svc;
1488 struct ip_vs_dest *dest;
1489 struct list_head *l, *e, *p, *q;
1492 * Note: since the length of the buffer is usually the multiple
1493 * of 512, it is good to use fixed record of the divisor of 512,
1494 * so that records won't be truncated at buffer boundary.
1499 "IP Virtual Server version %d.%d.%d (size=%d)",
1500 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501 len += sprintf(buf+len, "%-63s\n", temp);
1502 len += sprintf(buf+len, "%-63s\n",
1503 "Prot LocalAddress:Port Scheduler Flags");
1504 len += sprintf(buf+len, "%-63s\n",
1505 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1508 read_lock_bh(&__ip_vs_svc_lock);
1510 /* print the service table hashed by <protocol,addr,port> */
1511 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512 l = &ip_vs_svc_table[idx];
1513 for (e=l->next; e!=l; e=e->next) {
1514 svc = list_entry(e, struct ip_vs_service, s_list);
1517 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518 sprintf(temp2, "persistent %d %08X",
1520 ntohl(svc->netmask));
1524 sprintf(temp, "%s %08X:%04X %s %s",
1525 ip_vs_proto_name(svc->protocol),
1528 svc->scheduler->name, temp2);
1529 len += sprintf(buf+len, "%-63s\n", temp);
1534 p = &svc->destinations;
1535 for (q=p->next; q!=p; q=q->next) {
1536 dest = list_entry(q, struct ip_vs_dest, n_list);
1541 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1544 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545 atomic_read(&dest->weight),
1546 atomic_read(&dest->activeconns),
1547 atomic_read(&dest->inactconns));
1548 len += sprintf(buf+len, "%-63s\n", temp);
1555 /* print the service table hashed by fwmark */
1556 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557 l = &ip_vs_svc_fwm_table[idx];
1558 for (e=l->next; e!=l; e=e->next) {
1559 svc = list_entry(e, struct ip_vs_service, f_list);
1562 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563 sprintf(temp2, "persistent %d %08X",
1565 ntohl(svc->netmask));
1569 sprintf(temp, "FWM %08X %s %s",
1571 svc->scheduler->name, temp2);
1572 len += sprintf(buf+len, "%-63s\n", temp);
1577 p = &svc->destinations;
1578 for (q=p->next; q!=p; q=q->next) {
1579 dest = list_entry(q, struct ip_vs_dest, n_list);
1584 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1587 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588 atomic_read(&dest->weight),
1589 atomic_read(&dest->activeconns),
1590 atomic_read(&dest->inactconns));
1591 len += sprintf(buf+len, "%-63s\n", temp);
1599 read_unlock_bh(&__ip_vs_svc_lock);
1601 *start = buf+len-(pos-offset); /* Start of wanted data */
1611 struct ip_vs_stats ip_vs_stats;
1614 ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1622 len += sprintf(buf+len, "%-63s\n%-63s\n",
1623 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1624 " Total Incoming Outgoing Incoming Outgoing",
1625 " Conns Packets Packets Bytes Bytes");
1627 spin_lock_bh(&ip_vs_stats.lock);
1628 sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1631 ip_vs_stats.outpkts,
1632 (__u32)(ip_vs_stats.inbytes>>32),
1633 (__u32)ip_vs_stats.inbytes,
1634 (__u32)(ip_vs_stats.outbytes>>32),
1635 (__u32)ip_vs_stats.outbytes);
1636 len += sprintf(buf+len, "%-62s\n\n", temp);
1638 len += sprintf(buf+len, "%-63s\n",
1639 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1640 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s");
1641 sprintf(temp, "%8X %8X %8X %16X %16X",
1646 ip_vs_stats.outbps);
1647 len += sprintf(buf+len, "%-63s\n", temp);
1649 spin_unlock_bh(&ip_vs_stats.lock);
1652 *start = buf+len-(pos-offset); /* Start of wanted data */
1663 * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1665 static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1667 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1672 if (u->tcp_timeout) {
1673 vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674 = u->tcp_timeout * HZ;
1677 if (u->tcp_fin_timeout) {
1678 vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679 = u->tcp_fin_timeout * HZ;
1682 if (u->udp_timeout) {
1683 vs_timeout_table.timeout[IP_VS_S_UDP]
1684 = u->udp_timeout * HZ;
1691 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1694 struct ip_vs_rule_user *urule;
1695 struct ip_vs_service *svc = NULL;
1697 if (!capable(CAP_NET_ADMIN))
1701 * Check the size of mm, no overflow...
1702 * len > 128000 is a sanity check.
1704 if (len < sizeof(struct ip_vs_rule_user)) {
1705 IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706 len, sizeof(struct ip_vs_rule_user));
1708 } else if (len > 128000) {
1709 IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1711 } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712 IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1714 } else if (copy_from_user(urule, user, len) != 0) {
1720 if (down_interruptible(&__ip_vs_mutex)) {
1725 if (cmd == IP_VS_SO_SET_FLUSH) {
1726 /* Flush the virtual service */
1727 ret = ip_vs_flush();
1729 } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730 /* Set timeout values for (tcp tcpfin udp) */
1731 ret = ip_vs_set_timeouts(urule);
1733 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734 ret = start_sync_thread(urule->state, urule->mcast_ifn,
1737 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738 ret = stop_sync_thread(urule->state);
1740 } else if (cmd == IP_VS_SO_SET_ZERO) {
1741 /* if no service address is set, zero counters in all */
1742 if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743 ret = ip_vs_zero_all();
1749 * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1751 if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752 IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753 urule->protocol, NIPQUAD(urule->vaddr),
1754 ntohs(urule->vport), urule->sched_name);
1760 * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1762 if (urule->vfwmark == 0)
1763 svc = __ip_vs_service_get(urule->protocol,
1764 urule->vaddr, urule->vport);
1766 svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1768 if (cmd != IP_VS_SO_SET_ADD
1769 && (svc == NULL || svc->protocol != urule->protocol)) {
1775 case IP_VS_SO_SET_ADD:
1779 ret = ip_vs_add_service(urule, &svc);
1781 case IP_VS_SO_SET_EDIT:
1782 ret = ip_vs_edit_service(svc, urule);
1784 case IP_VS_SO_SET_DEL:
1785 ret = ip_vs_del_service(svc);
1789 case IP_VS_SO_SET_ADDDEST:
1790 ret = ip_vs_add_dest(svc, urule);
1792 case IP_VS_SO_SET_EDITDEST:
1793 ret = ip_vs_edit_dest(svc, urule);
1795 case IP_VS_SO_SET_DELDEST:
1796 ret = ip_vs_del_dest(svc, urule);
1798 case IP_VS_SO_SET_ZERO:
1799 ret = ip_vs_zero_service(svc);
1806 ip_vs_service_put(svc);
1819 __ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1821 spin_lock_bh(&src->lock);
1822 memcpy(dst, src, (char*)&src->lock - (char*)src);
1823 spin_unlock_bh(&src->lock);
1827 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828 struct ip_vs_get_services *uptr)
1831 struct ip_vs_service *svc;
1832 struct list_head *l;
1833 struct ip_vs_service_user entry;
1836 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837 list_for_each (l, &ip_vs_svc_table[idx]) {
1838 if (count >= get->num_services)
1840 svc = list_entry(l, struct ip_vs_service, s_list);
1841 entry.protocol = svc->protocol;
1842 entry.addr = svc->addr;
1843 entry.port = svc->port;
1844 entry.fwmark = svc->fwmark;
1845 strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1846 entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1847 entry.flags = svc->flags;
1848 entry.timeout = svc->timeout / HZ;
1849 entry.netmask = svc->netmask;
1850 entry.num_dests = svc->num_dests;
1851 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1852 if (copy_to_user(&uptr->entrytable[count],
1853 &entry, sizeof(entry))) {
1861 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1862 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1863 if (count >= get->num_services)
1865 svc = list_entry(l, struct ip_vs_service, f_list);
1866 entry.protocol = svc->protocol;
1867 entry.addr = svc->addr;
1868 entry.port = svc->port;
1869 entry.fwmark = svc->fwmark;
1870 strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1871 entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1872 entry.flags = svc->flags;
1873 entry.timeout = svc->timeout / HZ;
1874 entry.netmask = svc->netmask;
1875 entry.num_dests = svc->num_dests;
1876 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1877 if (copy_to_user(&uptr->entrytable[count],
1878 &entry, sizeof(entry))) {
1890 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1891 struct ip_vs_get_dests *uptr)
1893 struct ip_vs_service *svc;
1897 svc = __ip_vs_svc_fwm_get(get->fwmark);
1899 svc = __ip_vs_service_get(get->protocol,
1900 get->addr, get->port);
1903 struct ip_vs_dest *dest;
1904 struct list_head *l, *e;
1905 struct ip_vs_dest_user entry;
1907 l = &svc->destinations;
1908 for (e=l->next; e!=l; e=e->next) {
1909 if (count >= get->num_dests)
1911 dest = list_entry(e, struct ip_vs_dest, n_list);
1912 entry.addr = dest->addr;
1913 entry.port = dest->port;
1914 entry.flags = atomic_read(&dest->conn_flags);
1915 entry.weight = atomic_read(&dest->weight);
1916 entry.activeconns = atomic_read(&dest->activeconns);
1917 entry.inactconns = atomic_read(&dest->inactconns);
1918 __ip_vs_copy_stats(&entry.stats, &dest->stats);
1919 if (copy_to_user(&uptr->entrytable[count],
1920 &entry, sizeof(entry))) {
1926 ip_vs_service_put(svc);
1933 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1935 u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1936 u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1937 u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1941 do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1945 if (!capable(CAP_NET_ADMIN))
1948 if (down_interruptible(&__ip_vs_mutex))
1949 return -ERESTARTSYS;
1952 case IP_VS_SO_GET_VERSION:
1956 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1957 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1958 if (*len < strlen(buf)+1) {
1962 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1966 *len = strlen(buf)+1;
1970 case IP_VS_SO_GET_INFO:
1972 struct ip_vs_getinfo info;
1973 info.version = IP_VS_VERSION_CODE;
1974 info.size = IP_VS_CONN_TAB_SIZE;
1975 info.num_services = ip_vs_num_services;
1976 if (copy_to_user(user, &info, sizeof(info)) != 0)
1981 case IP_VS_SO_GET_SERVICES:
1983 struct ip_vs_get_services get;
1985 if (*len < sizeof(get)) {
1986 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1990 if (copy_from_user(&get, user, sizeof(get))) {
1994 if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1995 IP_VS_ERR("length: %u != %Zu\n", *len,
1996 sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
2000 ret = __ip_vs_get_service_entries(&get, user);
2004 case IP_VS_SO_GET_SERVICE:
2006 struct ip_vs_service_user get;
2007 struct ip_vs_service *svc;
2009 if (*len != sizeof(get)) {
2010 IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2014 if (copy_from_user(&get, user, sizeof(get))) {
2020 svc = __ip_vs_svc_fwm_get(get.fwmark);
2022 svc = __ip_vs_service_get(get.protocol,
2023 get.addr, get.port);
2025 strncpy(get.sched_name, svc->scheduler->name, sizeof(get.sched_name));
2026 get.sched_name[sizeof(get.sched_name) - 1] = 0;
2027 get.flags = svc->flags;
2028 get.timeout = svc->timeout / HZ;
2029 get.netmask = svc->netmask;
2030 get.num_dests = svc->num_dests;
2031 __ip_vs_copy_stats(&get.stats, &svc->stats);
2032 if (copy_to_user(user, &get, *len) != 0)
2034 ip_vs_service_put(svc);
2040 case IP_VS_SO_GET_DESTS:
2042 struct ip_vs_get_dests get;
2044 if (*len < sizeof(get)) {
2045 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2049 if (copy_from_user(&get, user, sizeof(get))) {
2053 if (*len != (sizeof(get) +
2054 sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2055 IP_VS_ERR("length: %u != %Zu\n", *len,
2056 sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2060 ret = __ip_vs_get_dest_entries(&get, user);
2064 case IP_VS_SO_GET_TIMEOUTS:
2066 struct ip_vs_timeout_user u;
2068 if (*len < sizeof(u)) {
2069 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2073 __ip_vs_get_timeouts(&u);
2074 if (copy_to_user(user, &u, sizeof(u)) != 0)
2079 case IP_VS_SO_GET_DAEMON:
2081 struct ip_vs_daemon_user u;
2083 if (*len < sizeof(u)) {
2084 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2088 u.state = ip_vs_sync_state;
2089 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2090 strncpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn, sizeof(u.mcast_master_ifn));
2091 u.mcast_master_ifn[sizeof(u.mcast_master_ifn) - 1] = 0;
2093 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2094 strncpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn, sizeof(u.mcast_backup_ifn));
2095 u.mcast_backup_ifn[sizeof(u.mcast_backup_ifn) - 1] = 0;
2097 if (copy_to_user(user, &u, sizeof(u)) != 0)
2112 static struct nf_sockopt_ops ip_vs_sockopts = {
2113 { NULL, NULL }, PF_INET,
2114 IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2115 IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2119 int ip_vs_control_init(void)
2126 ret = nf_register_sockopt(&ip_vs_sockopts);
2128 IP_VS_ERR("cannot register sockopt.\n");
2132 proc_net_create("ip_vs", 0, ip_vs_get_info);
2133 proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2135 ipv4_vs_table.sysctl_header =
2136 register_sysctl_table(ipv4_vs_table.root_dir, 0);
2138 * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2141 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2142 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2143 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2145 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2146 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2149 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2150 ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2151 ip_vs_new_estimator(&ip_vs_stats);
2153 /* Hook the defense timer */
2154 init_timer(&defense_timer);
2155 defense_timer.function = defense_timer_handler;
2156 defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2157 add_timer(&defense_timer);
2163 void ip_vs_control_cleanup(void)
2166 ip_vs_trash_cleanup();
2167 del_timer_sync(&defense_timer);
2168 ip_vs_kill_estimator(&ip_vs_stats);
2169 unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2170 proc_net_remove("ip_vs_stats");
2171 proc_net_remove("ip_vs");
2172 nf_unregister_sockopt(&ip_vs_sockopts);