more changes on original files
[linux-2.4.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Version:     $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $
9  *
10  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
11  *              Peter Kese <peter.kese@ijs.si>
12  *              Julian Anastasov <ja@ssi.bg>
13  *
14  *              This program is free software; you can redistribute it and/or
15  *              modify it under the terms of the GNU General Public License
16  *              as published by the Free Software Foundation; either version
17  *              2 of the License, or (at your option) any later version.
18  *
19  * Changes:
20  *
21  */
22
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/sysctl.h>
28 #include <linux/proc_fs.h>
29 #include <linux/timer.h>
30 #include <linux/swap.h>
31 #include <linux/proc_fs.h>
32
33 #include <linux/netfilter.h>
34 #include <linux/netfilter_ipv4.h>
35
36 #include <net/ip.h>
37 #include <net/sock.h>
38
39 #include <asm/uaccess.h>
40
41 #include <net/ip_vs.h>
42
43 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
44 static DECLARE_MUTEX(__ip_vs_mutex);
45
46 /* lock for service table */
47 rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
48
49 /* lock for table with the real services */
50 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
51
52 /* lock for state and timeout tables */
53 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
54
55 /* lock for drop entry handling */
56 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
57
58 /* lock for drop packet handling */
59 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
60
61 /* 1/rate drop and drop-entry variables */
62 int ip_vs_drop_rate = 0;
63 int ip_vs_drop_counter = 0;
64 atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
65
66 /* number of virtual services */
67 static int ip_vs_num_services = 0;
68
69 /* sysctl variables */
70 static int sysctl_ip_vs_drop_entry = 0;
71 static int sysctl_ip_vs_drop_packet = 0;
72 static int sysctl_ip_vs_secure_tcp = 0;
73 static int sysctl_ip_vs_amemthresh = 2048;
74 static int sysctl_ip_vs_am_droprate = 10;
75 int sysctl_ip_vs_cache_bypass = 0;
76 int sysctl_ip_vs_expire_nodest_conn = 0;
77 int sysctl_ip_vs_expire_quiescent_template = 0;
78 int sysctl_ip_vs_sync_threshold = 3;
79 int sysctl_ip_vs_nat_icmp_send = 0;
80
81 #ifdef CONFIG_IP_VS_DEBUG
82 static int sysctl_ip_vs_debug_level = 0;
83
84 int ip_vs_get_debug_level(void)
85 {
86         return sysctl_ip_vs_debug_level;
87 }
88 #endif
89
90 /*
91  *      update_defense_level is called from timer bh and from sysctl.
92  */
93 static void update_defense_level(void)
94 {
95         struct sysinfo i;
96         int availmem;
97         int nomem;
98
99         /* we only count free and buffered memory (in pages) */
100         si_meminfo(&i);
101         availmem = i.freeram + i.bufferram;
102
103         nomem = (availmem < sysctl_ip_vs_amemthresh);
104
105         /* drop_entry */
106         spin_lock(&__ip_vs_dropentry_lock);
107         switch (sysctl_ip_vs_drop_entry) {
108         case 0:
109                 atomic_set(&ip_vs_dropentry, 0);
110                 break;
111         case 1:
112                 if (nomem) {
113                         atomic_set(&ip_vs_dropentry, 1);
114                         sysctl_ip_vs_drop_entry = 2;
115                 } else {
116                         atomic_set(&ip_vs_dropentry, 0);
117                 }
118                 break;
119         case 2:
120                 if (nomem) {
121                         atomic_set(&ip_vs_dropentry, 1);
122                 } else {
123                         atomic_set(&ip_vs_dropentry, 0);
124                         sysctl_ip_vs_drop_entry = 1;
125                 };
126                 break;
127         case 3:
128                 atomic_set(&ip_vs_dropentry, 1);
129                 break;
130         }
131         spin_unlock(&__ip_vs_dropentry_lock);
132
133         /* drop_packet */
134         spin_lock(&__ip_vs_droppacket_lock);
135         switch (sysctl_ip_vs_drop_packet) {
136         case 0:
137                 ip_vs_drop_rate = 0;
138                 break;
139         case 1:
140                 if (nomem) {
141                         ip_vs_drop_rate = ip_vs_drop_counter
142                                 = sysctl_ip_vs_amemthresh /
143                                 (sysctl_ip_vs_amemthresh - availmem);
144                         sysctl_ip_vs_drop_packet = 2;
145                 } else {
146                         ip_vs_drop_rate = 0;
147                 }
148                 break;
149         case 2:
150                 if (nomem) {
151                         ip_vs_drop_rate = ip_vs_drop_counter
152                                 = sysctl_ip_vs_amemthresh /
153                                 (sysctl_ip_vs_amemthresh - availmem);
154                 } else {
155                         ip_vs_drop_rate = 0;
156                         sysctl_ip_vs_drop_packet = 1;
157                 }
158                 break;
159         case 3:
160                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
161                 break;
162         }
163         spin_unlock(&__ip_vs_droppacket_lock);
164
165         /* secure_tcp */
166         write_lock(&__ip_vs_securetcp_lock);
167         switch (sysctl_ip_vs_secure_tcp) {
168         case 0:
169                 ip_vs_secure_tcp_set(0);
170                 break;
171         case 1:
172                 if (nomem) {
173                         ip_vs_secure_tcp_set(1);
174                         sysctl_ip_vs_secure_tcp = 2;
175                 } else {
176                         ip_vs_secure_tcp_set(0);
177                 }
178                 break;
179         case 2:
180                 if (nomem) {
181                         ip_vs_secure_tcp_set(1);
182                 } else {
183                         ip_vs_secure_tcp_set(0);
184                         sysctl_ip_vs_secure_tcp = 1;
185                 }
186                 break;
187         case 3:
188                 ip_vs_secure_tcp_set(1);
189                 break;
190         }
191         write_unlock(&__ip_vs_securetcp_lock);
192 }
193
194
195 /*
196  *      Timer for checking the defense
197  */
198 static struct timer_list defense_timer;
199 #define DEFENSE_TIMER_PERIOD    1*HZ
200
201 static void defense_timer_handler(unsigned long data)
202 {
203         update_defense_level();
204         if (atomic_read(&ip_vs_dropentry))
205                 ip_vs_random_dropentry();
206
207         mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
208 }
209
210
211 /*
212  *  Hash table: for virtual service lookups
213  */
214 #define IP_VS_SVC_TAB_BITS 8
215 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
216 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
217
218 /* the service table hashed by <protocol, addr, port> */
219 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
220 /* the service table hashed by fwmark */
221 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
222
223 /*
224  *  Hash table: for real service lookups
225  */
226 #define IP_VS_RTAB_BITS 4
227 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
228 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
229
230 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
231
232 /*
233  * Trash for destinations
234  */
235 static LIST_HEAD(ip_vs_dest_trash);
236
237 /*
238  * FTP & NULL virtual service counters
239  */
240 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
241 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
242
243
244 /*
245  *  Returns hash value for virtual service
246  */
247 static __inline__ unsigned
248 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
249 {
250         register unsigned porth = ntohs(port);
251
252         return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
253                 & IP_VS_SVC_TAB_MASK;
254 }
255
256 /*
257  *  Returns hash value of fwmark for virtual service lookup
258  */
259 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
260 {
261         return fwmark & IP_VS_SVC_TAB_MASK;
262 }
263
264 /*
265  *  Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
266  *  or in the ip_vs_svc_fwm_table by fwmark.
267  *  Should be called with locked tables.
268  *  Returns bool success.
269  */
270 static int ip_vs_svc_hash(struct ip_vs_service *svc)
271 {
272         unsigned hash;
273
274         if (svc->flags & IP_VS_SVC_F_HASHED) {
275                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
276                           "called from %p\n", __builtin_return_address(0));
277                 return 0;
278         }
279
280         if (svc->fwmark == 0) {
281                 /*
282                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
283                  */
284                 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
285                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
286         } else {
287                 /*
288                  *  Hash it by fwmark in ip_vs_svc_fwm_table
289                  */
290                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
291                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
292         }
293
294         svc->flags |= IP_VS_SVC_F_HASHED;
295         /* increase its refcnt because it is referenced by the svc table */
296         atomic_inc(&svc->refcnt);
297         return 1;
298 }
299
300
301 /*
302  *  Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
303  *  Should be called with locked tables.
304  *  Returns bool success.
305  */
306 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
307 {
308         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
309                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
310                           "called from %p\n", __builtin_return_address(0));
311                 return 0;
312         }
313
314         if (svc->fwmark == 0) {
315                 /*
316                  * Remove it from the ip_vs_svc_table table.
317                  */
318                 list_del(&svc->s_list);
319         } else {
320                 /*
321                  * Remove it from the ip_vs_svc_fwm_table table.
322                  */
323                 list_del(&svc->f_list);
324         }
325
326         svc->flags &= ~IP_VS_SVC_F_HASHED;
327         atomic_dec(&svc->refcnt);
328         return 1;
329 }
330
331
332 /*
333  *  Get service by {proto,addr,port} in the service table.
334  */
335 static __inline__ struct ip_vs_service *
336 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
337 {
338         unsigned hash;
339         struct ip_vs_service *svc;
340         struct list_head *l,*e;
341
342         /*
343          *      Check for "full" addressed entries
344          */
345         hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
346
347         l = &ip_vs_svc_table[hash];
348         for (e=l->next; e!=l; e=e->next) {
349                 svc = list_entry(e, struct ip_vs_service, s_list);
350                 if ((svc->addr == vaddr)
351                     && (svc->port == vport)
352                     && (svc->protocol == protocol)) {
353                         /* HIT */
354                         atomic_inc(&svc->usecnt);
355                         return svc;
356                 }
357         }
358
359         return NULL;
360 }
361
362
363 /*
364  *  Get service by {fwmark} in the service table.
365  */
366 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
367 {
368         unsigned hash;
369         struct ip_vs_service *svc;
370         struct list_head *l,*e;
371
372         /*
373          *      Check for "full" addressed entries
374          */
375         hash = ip_vs_svc_fwm_hashkey(fwmark);
376
377         l = &ip_vs_svc_fwm_table[hash];
378         for (e=l->next; e!=l; e=e->next) {
379                 svc = list_entry(e, struct ip_vs_service, f_list);
380                 if (svc->fwmark == fwmark) {
381                         /* HIT */
382                         atomic_inc(&svc->usecnt);
383                         return svc;
384                 }
385         }
386
387         return NULL;
388 }
389
390 struct ip_vs_service *
391 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
392 {
393         struct ip_vs_service *svc;
394
395         read_lock(&__ip_vs_svc_lock);
396
397         /*
398          *      Check the table hashed by fwmark first
399          */
400         if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
401                 goto out;
402
403         /*
404          *      Check the table hashed by <protocol,addr,port>
405          *      for "full" addressed entries
406          */
407         svc = __ip_vs_service_get(protocol, vaddr, vport);
408
409         if (svc == NULL
410             && protocol == IPPROTO_TCP
411             && atomic_read(&ip_vs_ftpsvc_counter)
412             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
413                 /*
414                  * Check if ftp service entry exists, the packet
415                  * might belong to FTP data connections.
416                  */
417                 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
418         }
419
420         if (svc == NULL
421             && atomic_read(&ip_vs_nullsvc_counter)) {
422                 /*
423                  * Check if the catch-all port (port zero) exists
424                  */
425                 svc = __ip_vs_service_get(protocol, vaddr, 0);
426         }
427
428   out:
429         read_unlock(&__ip_vs_svc_lock);
430
431         IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
432                   fwmark, ip_vs_proto_name(protocol),
433                   NIPQUAD(vaddr), ntohs(vport),
434                   svc?"hit":"not hit");
435
436         return svc;
437 }
438
439
440 static inline void
441 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
442 {
443         atomic_inc(&svc->refcnt);
444         dest->svc = svc;
445 }
446
447 static inline void
448 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
449 {
450         struct ip_vs_service *svc = dest->svc;
451
452         dest->svc = NULL;
453         if (atomic_dec_and_test(&svc->refcnt))
454                 kfree(svc);
455 }
456
457 /*
458  *  Returns hash value for real service
459  */
460 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
461 {
462         register unsigned porth = ntohs(port);
463
464         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
465                 & IP_VS_RTAB_MASK;
466 }
467
468 /*
469  *  Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
470  *  should be called with locked tables.
471  *  returns bool success.
472  */
473 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
474 {
475         unsigned hash;
476
477         if (!list_empty(&dest->d_list)) {
478                 return 0;
479         }
480
481         /*
482          *      Hash by proto,addr,port,
483          *      which are the parameters of the real service.
484          */
485         hash = ip_vs_rs_hashkey(dest->addr, dest->port);
486         list_add(&dest->d_list, &ip_vs_rtable[hash]);
487
488         return 1;
489 }
490
491 /*
492  *  UNhashes ip_vs_dest from ip_vs_rtable.
493  *  should be called with locked tables.
494  *  returns bool success.
495  */
496 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
497 {
498         /*
499          * Remove it from the ip_vs_rtable table.
500          */
501         if (!list_empty(&dest->d_list)) {
502                 list_del(&dest->d_list);
503                 INIT_LIST_HEAD(&dest->d_list);
504         }
505
506         return 1;
507 }
508
509 /*
510  *  Lookup real service by {proto,addr,port} in the real service table.
511  */
512 struct ip_vs_dest *
513 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
514 {
515         unsigned hash;
516         struct ip_vs_dest *dest;
517         struct list_head *l,*e;
518
519         /*
520          *      Check for "full" addressed entries
521          *      Return the first found entry
522          */
523         hash = ip_vs_rs_hashkey(daddr, dport);
524
525         l = &ip_vs_rtable[hash];
526
527         read_lock(&__ip_vs_rs_lock);
528         for (e=l->next; e!=l; e=e->next) {
529                 dest = list_entry(e, struct ip_vs_dest, d_list);
530                 if ((dest->addr == daddr)
531                     && (dest->port == dport)
532                     && ((dest->protocol == protocol) ||
533                         dest->vfwmark)) {
534                         /* HIT */
535                         read_unlock(&__ip_vs_rs_lock);
536                         return dest;
537                 }
538         }
539         read_unlock(&__ip_vs_rs_lock);
540
541         return NULL;
542 }
543
544 /*
545  *  Lookup destination by {addr,port} in the given service
546  */
547 static struct ip_vs_dest *
548 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
549 {
550         struct ip_vs_dest *dest;
551         struct list_head *l, *e;
552
553         /*
554          * Find the destination for the given service
555          */
556         l = &svc->destinations;
557         for (e=l->next; e!=l; e=e->next) {
558                 dest = list_entry(e, struct ip_vs_dest, n_list);
559                 if ((dest->addr == daddr) && (dest->port == dport)) {
560                         /* HIT */
561                         return dest;
562                 }
563         }
564
565         return NULL;
566 }
567
568
569 /*
570  *  Lookup dest by {svc,addr,port} in the destination trash.
571  *  The destination trash is used to hold the destinations that are removed
572  *  from the service table but are still referenced by some conn entries.
573  *  The reason to add the destination trash is when the dest is temporary
574  *  down (either by administrator or by monitor program), the dest can be
575  *  picked back from the trash, the remaining connections to the dest can
576  *  continue, and the counting information of the dest is also useful for
577  *  scheduling.
578  */
579 static struct ip_vs_dest *
580 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
581 {
582         struct ip_vs_dest *dest;
583         struct list_head *l, *e;
584
585         /*
586          * Find the destination in trash
587          */
588         l = &ip_vs_dest_trash;
589
590         for (e=l->next; e!=l; e=e->next) {
591                 dest = list_entry(e, struct ip_vs_dest, n_list);
592                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
593                           "refcnt=%d\n",
594                           dest->vfwmark,
595                           NIPQUAD(dest->addr), ntohs(dest->port),
596                           atomic_read(&dest->refcnt));
597                 if (dest->addr == daddr &&
598                     dest->port == dport &&
599                     dest->vfwmark == svc->fwmark &&
600                     dest->protocol == svc->protocol &&
601                     (svc->fwmark ||
602                      (dest->vaddr == svc->addr &&
603                       dest->vport == svc->port))) {
604                         /* HIT */
605                         return dest;
606                 }
607
608                 /*
609                  * Try to purge the destination from trash if not referenced
610                  */
611                 if (atomic_read(&dest->refcnt) == 1) {
612                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
613                                   "from trash\n",
614                                   dest->vfwmark,
615                                   NIPQUAD(dest->addr), ntohs(dest->port));
616                         e = e->prev;
617                         list_del(&dest->n_list);
618                         __ip_vs_dst_reset(dest);
619                         __ip_vs_unbind_svc(dest);
620                         kfree(dest);
621                 }
622         }
623
624         return NULL;
625 }
626
627
628 /*
629  *  Clean up all the destinations in the trash
630  *  Called by the ip_vs_control_cleanup()
631  *
632  *  When the ip_vs_control_clearup is activated by ipvs module exit,
633  *  the service tables must have been flushed and all the connections
634  *  are expired, and the refcnt of each destination in the trash must
635  *  be 1, so we simply release them here.
636  */
637 static void ip_vs_trash_cleanup(void)
638 {
639         struct ip_vs_dest *dest;
640         struct list_head *l;
641
642         l = &ip_vs_dest_trash;
643
644         while (l->next != l) {
645                 dest = list_entry(l->next, struct ip_vs_dest, n_list);
646                 list_del(&dest->n_list);
647                 __ip_vs_dst_reset(dest);
648                 __ip_vs_unbind_svc(dest);
649                 kfree(dest);
650         }
651 }
652
653
654 static inline void
655 __ip_vs_zero_stats(struct ip_vs_stats *stats)
656 {
657         spin_lock_bh(&stats->lock);
658         memset(stats, 0, (char *)&stats->lock - (char *)stats);
659         spin_unlock_bh(&stats->lock);
660         ip_vs_zero_estimator(stats);
661 }
662
663 /*
664  *  Update a destination in the given service
665  */
666 static void __ip_vs_update_dest(struct ip_vs_service *svc,
667                                 struct ip_vs_dest *dest,
668                                 struct ip_vs_rule_user *ur)
669 {
670         int conn_flags;
671
672         /*
673          *    Set the weight and the flags
674          */
675         atomic_set(&dest->weight, ur->weight);
676
677         conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
678
679         /*
680          *    Check if local node and update the flags
681          */
682         if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
683                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684                         | IP_VS_CONN_F_LOCALNODE;
685         }
686
687         /*
688          *    Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
689          */
690         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
691                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
692         } else {
693                 /*
694                  *    Put the real service in ip_vs_rtable if not present.
695                  *    For now only for NAT!
696                  */
697                 write_lock_bh(&__ip_vs_rs_lock);
698                 ip_vs_rs_hash(dest);
699                 write_unlock_bh(&__ip_vs_rs_lock);
700         }
701         atomic_set(&dest->conn_flags, conn_flags);
702
703         /* bind the service */
704         if (!dest->svc) {
705                 __ip_vs_bind_svc(dest, svc);
706         } else {
707                 if (dest->svc != svc) {
708                         __ip_vs_unbind_svc(dest);
709                         __ip_vs_zero_stats(&dest->stats);
710                         __ip_vs_bind_svc(dest, svc);
711                 }
712         }
713
714         /* set the dest status flags */
715         dest->flags |= IP_VS_DEST_F_AVAILABLE;
716 }
717
718
719 /*
720  *  Create a destination for the given service
721  */
722 static int
723 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
724                struct ip_vs_dest **destp)
725 {
726         struct ip_vs_dest *dest;
727         unsigned atype;
728
729         EnterFunction(2);
730
731         atype = inet_addr_type(ur->daddr);
732         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
733                 return -EINVAL;
734
735         *destp = dest = (struct ip_vs_dest*)
736                 kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737         if (dest == NULL) {
738                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739                 return -ENOMEM;
740         }
741         memset(dest, 0, sizeof(struct ip_vs_dest));
742
743         dest->protocol = svc->protocol;
744         dest->vaddr = svc->addr;
745         dest->vport = svc->port;
746         dest->vfwmark = svc->fwmark;
747         dest->addr = ur->daddr;
748         dest->port = ur->dport;
749
750         atomic_set(&dest->activeconns, 0);
751         atomic_set(&dest->inactconns, 0);
752         atomic_set(&dest->refcnt, 0);
753
754         INIT_LIST_HEAD(&dest->d_list);
755         dest->dst_lock = SPIN_LOCK_UNLOCKED;
756         dest->stats.lock = SPIN_LOCK_UNLOCKED;
757         __ip_vs_update_dest(svc, dest, ur);
758         ip_vs_new_estimator(&dest->stats);
759
760         LeaveFunction(2);
761         return 0;
762 }
763
764
765 /*
766  *  Add a destination into an existing service
767  */
768 static int ip_vs_add_dest(struct ip_vs_service *svc,
769                           struct ip_vs_rule_user *ur)
770 {
771         struct ip_vs_dest *dest;
772         __u32 daddr = ur->daddr;
773         __u16 dport = ur->dport;
774         int ret;
775
776         EnterFunction(2);
777
778         if (ur->weight < 0) {
779                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
780                 return -ERANGE;
781         }
782
783         /*
784          * Check if the dest already exists in the list
785          */
786         dest = ip_vs_lookup_dest(svc, daddr, dport);
787         if (dest != NULL) {
788                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
789                 return -EEXIST;
790         }
791
792         /*
793          * Check if the dest already exists in the trash and
794          * is from the same service
795          */
796         dest = ip_vs_trash_get_dest(svc, daddr, dport);
797         if (dest != NULL) {
798                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
799                           "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
800                           NIPQUAD(daddr), ntohs(dport),
801                           atomic_read(&dest->refcnt),
802                           dest->vfwmark,
803                           NIPQUAD(dest->vaddr),
804                           ntohs(dest->vport));
805                 __ip_vs_update_dest(svc, dest, ur);
806
807                 /*
808                  * Get the destination from the trash
809                  */
810                 list_del(&dest->n_list);
811
812                 ip_vs_new_estimator(&dest->stats);
813
814                 write_lock_bh(&__ip_vs_svc_lock);
815
816                 /*
817                  * Wait until all other svc users go away.
818                  */
819                 while (atomic_read(&svc->usecnt) > 1) {};
820
821                 list_add(&dest->n_list, &svc->destinations);
822                 svc->num_dests++;
823
824                 /* call the update_service function of its scheduler */
825                 svc->scheduler->update_service(svc);
826
827                 write_unlock_bh(&__ip_vs_svc_lock);
828                 return 0;
829         }
830
831         /*
832          * Allocate and initialize the dest structure
833          */
834         ret = ip_vs_new_dest(svc, ur, &dest);
835         if (ret) {
836                 return ret;
837         }
838
839         /*
840          * Add the dest entry into the list
841          */
842         atomic_inc(&dest->refcnt);
843
844         write_lock_bh(&__ip_vs_svc_lock);
845
846         /*
847          * Wait until all other svc users go away.
848          */
849         while (atomic_read(&svc->usecnt) > 1) {};
850
851         list_add(&dest->n_list, &svc->destinations);
852         svc->num_dests++;
853
854         /* call the update_service function of its scheduler */
855         svc->scheduler->update_service(svc);
856
857         write_unlock_bh(&__ip_vs_svc_lock);
858
859         LeaveFunction(2);
860
861         return 0;
862 }
863
864
865 /*
866  *  Edit a destination in the given service
867  */
868 static int ip_vs_edit_dest(struct ip_vs_service *svc,
869                            struct ip_vs_rule_user *ur)
870 {
871         struct ip_vs_dest *dest;
872         __u32 daddr = ur->daddr;
873         __u16 dport = ur->dport;
874
875         EnterFunction(2);
876
877         if (ur->weight < 0) {
878                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
879                 return -ERANGE;
880         }
881
882         /*
883          *  Lookup the destination list
884          */
885         dest = ip_vs_lookup_dest(svc, daddr, dport);
886         if (dest == NULL) {
887                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
888                 return -ENOENT;
889         }
890
891         __ip_vs_update_dest(svc, dest, ur);
892
893         write_lock_bh(&__ip_vs_svc_lock);
894
895         /* Wait until all other svc users go away */
896         while (atomic_read(&svc->usecnt) > 1) {};
897
898         /* call the update_service, because server weight may be changed */
899         svc->scheduler->update_service(svc);
900
901         write_unlock_bh(&__ip_vs_svc_lock);
902
903         LeaveFunction(2);
904
905         return 0;
906 }
907
908
909 /*
910  *  Delete a destination (must be already unlinked from the service)
911  */
912 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
913 {
914         ip_vs_kill_estimator(&dest->stats);
915
916         /*
917          *  Remove it from the d-linked list with the real services.
918          */
919         write_lock_bh(&__ip_vs_rs_lock);
920         ip_vs_rs_unhash(dest);
921         write_unlock_bh(&__ip_vs_rs_lock);
922
923         /*
924          *  Decrease the refcnt of the dest, and free the dest
925          *  if nobody refers to it (refcnt=0). Otherwise, throw
926          *  the destination into the trash.
927          */
928         if (atomic_dec_and_test(&dest->refcnt)) {
929                 __ip_vs_dst_reset(dest);
930                 /* simply decrease svc->refcnt here, let the caller check
931                    and release the service if nobody refers to it.
932                    Only user context can release destination and service,
933                    and only one user context can update virtual service at a
934                    time, so the operation here is OK */
935                 atomic_dec(&dest->svc->refcnt);
936                 kfree(dest);
937         } else {
938                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
939                           NIPQUAD(dest->addr), ntohs(dest->port),
940                           atomic_read(&dest->refcnt));
941                 list_add(&dest->n_list, &ip_vs_dest_trash);
942                 atomic_inc(&dest->refcnt);
943         }
944 }
945
946
947 /*
948  *  Unlink a destination from the given service
949  */
950 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
951                                 struct ip_vs_dest *dest,
952                                 int svcupd)
953 {
954         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
955
956         /*
957          *  Remove it from the d-linked destination list.
958          */
959         list_del(&dest->n_list);
960         svc->num_dests--;
961         if (svcupd) {
962                 /*
963                  *  Call the update_service function of its scheduler
964                  */
965                 svc->scheduler->update_service(svc);
966         }
967 }
968
969
970 /*
971  *  Delete a destination server in the given service
972  */
973 static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
974 {
975         struct ip_vs_dest *dest;
976         __u32 daddr = ur->daddr;
977         __u16 dport = ur->dport;
978
979         EnterFunction(2);
980
981         dest = ip_vs_lookup_dest(svc, daddr, dport);
982         if (dest == NULL) {
983                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
984                 return -ENOENT;
985         }
986
987         write_lock_bh(&__ip_vs_svc_lock);
988
989         /*
990          *      Wait until all other svc users go away.
991          */
992         while (atomic_read(&svc->usecnt) > 1) {};
993
994         /*
995          *      Unlink dest from the service
996          */
997         __ip_vs_unlink_dest(svc, dest, 1);
998
999         write_unlock_bh(&__ip_vs_svc_lock);
1000
1001         /*
1002          *      Delete the destination
1003          */
1004         __ip_vs_del_dest(dest);
1005
1006         LeaveFunction(2);
1007
1008         return 0;
1009 }
1010
1011
1012 /*
1013  *  Add a service into the service hash table
1014  */
1015 static int
1016 ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1017 {
1018         int ret = 0;
1019         struct ip_vs_scheduler *sched;
1020         struct ip_vs_service *svc = NULL;
1021
1022         MOD_INC_USE_COUNT;
1023
1024         /*
1025          * Lookup the scheduler, by 'ur->sched_name'
1026          */
1027         sched = ip_vs_scheduler_get(ur->sched_name);
1028         if (sched == NULL) {
1029                 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1030                            ur->sched_name);
1031                 ret = -ENOENT;
1032                 goto out_mod_dec;
1033         }
1034
1035         svc = (struct ip_vs_service*)
1036                 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1037         if (svc == NULL) {
1038                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1039                 ret = -ENOMEM;
1040                 goto out_err;
1041         }
1042         memset(svc, 0, sizeof(struct ip_vs_service));
1043
1044         svc->protocol = ur->protocol;
1045         svc->addr = ur->vaddr;
1046         svc->port = ur->vport;
1047         svc->fwmark = ur->vfwmark;
1048         svc->flags = ur->vs_flags;
1049         svc->timeout = ur->timeout * HZ;
1050         svc->netmask = ur->netmask;
1051
1052         INIT_LIST_HEAD(&svc->destinations);
1053         svc->sched_lock = RW_LOCK_UNLOCKED;
1054         svc->stats.lock = SPIN_LOCK_UNLOCKED;
1055
1056         /*
1057          *    Bind the scheduler
1058          */
1059         ret = ip_vs_bind_scheduler(svc, sched);
1060         if (ret) {
1061                 goto out_err;
1062         }
1063
1064         /*
1065          *    Update the virtual service counters
1066          */
1067         if (svc->port == FTPPORT)
1068                 atomic_inc(&ip_vs_ftpsvc_counter);
1069         else if (svc->port == 0)
1070                 atomic_inc(&ip_vs_nullsvc_counter);
1071
1072         /*
1073          *    I'm the first user of the service
1074          */
1075         atomic_set(&svc->usecnt, 1);
1076         atomic_set(&svc->refcnt, 0);
1077
1078         ip_vs_new_estimator(&svc->stats);
1079         ip_vs_num_services++;
1080
1081         /*
1082          *    Hash the service into the service table
1083          */
1084         write_lock_bh(&__ip_vs_svc_lock);
1085         ip_vs_svc_hash(svc);
1086         write_unlock_bh(&__ip_vs_svc_lock);
1087
1088         *svc_p = svc;
1089         return 0;
1090
1091   out_err:
1092         if (svc)
1093                 kfree(svc);
1094         ip_vs_scheduler_put(sched);
1095   out_mod_dec:
1096         MOD_DEC_USE_COUNT;
1097         return ret;
1098 }
1099
1100
1101 /*
1102  *      Edit a service and bind it with a new scheduler
1103  */
1104 static int ip_vs_edit_service(struct ip_vs_service *svc,
1105                               struct ip_vs_rule_user *ur)
1106 {
1107         struct ip_vs_scheduler *sched, *old_sched;
1108         int ret = 0;
1109
1110         /*
1111          * Lookup the scheduler, by 'ur->sched_name'
1112          */
1113         sched = ip_vs_scheduler_get(ur->sched_name);
1114         if (sched == NULL) {
1115                 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1116                            ur->sched_name);
1117                 return -ENOENT;
1118         }
1119
1120         write_lock_bh(&__ip_vs_svc_lock);
1121
1122         /*
1123          * Wait until all other svc users go away.
1124          */
1125         while (atomic_read(&svc->usecnt) > 1) {};
1126
1127         /*
1128          * Set the flags and timeout value
1129          */
1130         svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131         svc->timeout = ur->timeout * HZ;
1132         svc->netmask = ur->netmask;
1133
1134         old_sched = svc->scheduler;
1135         if (sched != old_sched) {
1136                 /*
1137                  * Unbind the old scheduler
1138                  */
1139                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1140                         old_sched = sched;
1141                         goto out;
1142                 }
1143
1144                 /*
1145                  * Bind the new scheduler
1146                  */
1147                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1148                         /*
1149                          * If ip_vs_bind_scheduler fails, restore the old
1150                          * scheduler.
1151                          * The main reason of failure is out of memory.
1152                          *
1153                          * The question is if the old scheduler can be
1154                          * restored all the time. TODO: if it cannot be
1155                          * restored some time, we must delete the service,
1156                          * otherwise the system may crash.
1157                          */
1158                         ip_vs_bind_scheduler(svc, old_sched);
1159                         old_sched = sched;
1160                 }
1161         }
1162
1163   out:
1164         write_unlock_bh(&__ip_vs_svc_lock);
1165
1166         if (old_sched)
1167                 ip_vs_scheduler_put(old_sched);
1168
1169         return ret;
1170 }
1171
1172
1173 /*
1174  *  Delete a service from the service list
1175  *  The service must be unlinked, unlocked and not referenced!
1176  */
1177 static void __ip_vs_del_service(struct ip_vs_service *svc)
1178 {
1179         struct list_head *l;
1180         struct ip_vs_dest *dest;
1181         struct ip_vs_scheduler *old_sched;
1182
1183         ip_vs_num_services--;
1184         ip_vs_kill_estimator(&svc->stats);
1185
1186         /*
1187          *    Unbind scheduler
1188          */
1189         old_sched = svc->scheduler;
1190         ip_vs_unbind_scheduler(svc);
1191         if (old_sched && old_sched->module)
1192                 __MOD_DEC_USE_COUNT(old_sched->module);
1193
1194         /*
1195          *    Unlink the whole destination list
1196          */
1197         l = &svc->destinations;
1198         while (l->next != l) {
1199                 dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200                 __ip_vs_unlink_dest(svc, dest, 0);
1201                 __ip_vs_del_dest(dest);
1202         }
1203
1204         /*
1205          *    Update the virtual service counters
1206          */
1207         if (svc->port == FTPPORT)
1208                 atomic_dec(&ip_vs_ftpsvc_counter);
1209         else if (svc->port == 0)
1210                 atomic_dec(&ip_vs_nullsvc_counter);
1211
1212         /*
1213          *    Free the service if nobody refers to it
1214          */
1215         if (atomic_read(&svc->refcnt) == 0)
1216                 kfree(svc);
1217         MOD_DEC_USE_COUNT;
1218 }
1219
1220 /*
1221  *  Delete a service from the service list
1222  */
1223 static int ip_vs_del_service(struct ip_vs_service *svc)
1224 {
1225         if (svc == NULL)
1226                 return -EEXIST;
1227
1228         /*
1229          * Unhash it from the service table
1230          */
1231         write_lock_bh(&__ip_vs_svc_lock);
1232
1233         ip_vs_svc_unhash(svc);
1234
1235         /*
1236          * Wait until all the svc users go away.
1237          */
1238         while (atomic_read(&svc->usecnt) > 1) {};
1239
1240         __ip_vs_del_service(svc);
1241
1242         write_unlock_bh(&__ip_vs_svc_lock);
1243
1244         return 0;
1245 }
1246
1247
1248 /*
1249  *  Flush all the virtual services
1250  */
1251 static int ip_vs_flush(void)
1252 {
1253         int idx;
1254         struct ip_vs_service *svc;
1255         struct list_head *l;
1256
1257         /*
1258          * Flush the service table hashed by <protocol,addr,port>
1259          */
1260         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261                 l = &ip_vs_svc_table[idx];
1262                 while (l->next != l) {
1263                         svc = list_entry(l->next,struct ip_vs_service,s_list);
1264                         write_lock_bh(&__ip_vs_svc_lock);
1265                         ip_vs_svc_unhash(svc);
1266                         /*
1267                          * Wait until all the svc users go away.
1268                          */
1269                         while (atomic_read(&svc->usecnt) > 0) {};
1270                         __ip_vs_del_service(svc);
1271                         write_unlock_bh(&__ip_vs_svc_lock);
1272                 }
1273         }
1274
1275         /*
1276          * Flush the service table hashed by fwmark
1277          */
1278         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279                 l = &ip_vs_svc_fwm_table[idx];
1280                 while (l->next != l) {
1281                         svc = list_entry(l->next,struct ip_vs_service,f_list);
1282                         write_lock_bh(&__ip_vs_svc_lock);
1283                         ip_vs_svc_unhash(svc);
1284                         /*
1285                          * Wait until all the svc users go away.
1286                          */
1287                         while (atomic_read(&svc->usecnt) > 0) {};
1288                         __ip_vs_del_service(svc);
1289                         write_unlock_bh(&__ip_vs_svc_lock);
1290                 }
1291         }
1292
1293         return 0;
1294 }
1295
1296
1297 /*
1298  *  Zero counters in a service or all services
1299  */
1300 static int ip_vs_zero_service(struct ip_vs_service *svc)
1301 {
1302         struct list_head *l;
1303         struct ip_vs_dest *dest;
1304
1305         write_lock_bh(&__ip_vs_svc_lock);
1306         list_for_each (l, &svc->destinations) {
1307                 dest = list_entry(l, struct ip_vs_dest, n_list);
1308                 __ip_vs_zero_stats(&dest->stats);
1309         }
1310         __ip_vs_zero_stats(&svc->stats);
1311         write_unlock_bh(&__ip_vs_svc_lock);
1312         return 0;
1313 }
1314
1315 static int ip_vs_zero_all(void)
1316 {
1317         int idx;
1318         struct list_head *l;
1319         struct ip_vs_service *svc;
1320
1321         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322                 list_for_each (l, &ip_vs_svc_table[idx]) {
1323                         svc = list_entry(l, struct ip_vs_service, s_list);
1324                         ip_vs_zero_service(svc);
1325                 }
1326         }
1327
1328         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329                 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330                         svc = list_entry(l, struct ip_vs_service, f_list);
1331                         ip_vs_zero_service(svc);
1332                 }
1333         }
1334
1335         __ip_vs_zero_stats(&ip_vs_stats);
1336         return 0;
1337 }
1338
1339
1340 static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341         struct file * filp, void *buffer, size_t *lenp)
1342 {
1343         int *valp = ctl->data;
1344         int val = *valp;
1345         int ret;
1346
1347         ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348         if (write && (*valp != val)) {
1349                 if ((*valp < 0) || (*valp > 3)) {
1350                         /* Restore the correct value */
1351                         *valp = val;
1352                 } else {
1353                         local_bh_disable();
1354                         update_defense_level();
1355                         local_bh_enable();
1356                 }
1357         }
1358         return ret;
1359 }
1360
1361
1362 /*
1363  *      IPVS sysctl table
1364  */
1365 struct ip_vs_sysctl_table {
1366         struct ctl_table_header *sysctl_header;
1367         ctl_table vs_vars[NET_IPV4_VS_LAST];
1368         ctl_table vs_dir[2];
1369         ctl_table ipv4_dir[2];
1370         ctl_table root_dir[2];
1371 };
1372
1373
1374 static struct ip_vs_sysctl_table ipv4_vs_table = {
1375         NULL,
1376         {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377           &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1378           &proc_dointvec},
1379 #ifdef CONFIG_IP_VS_DEBUG
1380          {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381           &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1382           &proc_dointvec},
1383 #endif
1384          {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385           &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1386           &proc_dointvec},
1387          {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388           &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389           &ip_vs_sysctl_defense_mode},
1390          {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391           &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392           &ip_vs_sysctl_defense_mode},
1393          {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394           &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395           &ip_vs_sysctl_defense_mode},
1396          {NET_IPV4_VS_TO_ES, "timeout_established",
1397           &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399          {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400           &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402          {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403           &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405          {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406           &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408          {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409           &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411          {NET_IPV4_VS_TO_CL, "timeout_close",
1412           &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414          {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415           &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417          {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418           &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420          {NET_IPV4_VS_TO_LI, "timeout_listen",
1421           &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423          {NET_IPV4_VS_TO_SA, "timeout_synack",
1424           &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426          {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427           &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429          {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430           &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431           sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432          {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433           &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1434           &proc_dointvec},
1435          {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436           &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1437           &proc_dointvec},
1438          {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439           &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1440           &proc_dointvec},
1441          {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442           &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1443           &proc_dointvec},
1444          {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445           &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1446           &proc_dointvec},
1447          {0}},
1448         {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1449          {0}},
1450         {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1451          {0}},
1452         {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1453          {0}}
1454 };
1455
1456
1457 /*
1458  *      Write the contents of the VS rule table to a PROCfs file.
1459  *      (It is kept just for backward compatibility)
1460  */
1461 static inline char *ip_vs_fwd_name(unsigned flags)
1462 {
1463         char *fwd;
1464
1465         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466         case IP_VS_CONN_F_LOCALNODE:
1467                 fwd = "Local";
1468                 break;
1469         case IP_VS_CONN_F_TUNNEL:
1470                 fwd = "Tunnel";
1471                 break;
1472         case IP_VS_CONN_F_DROUTE:
1473                 fwd = "Route";
1474                 break;
1475         default:
1476                 fwd = "Masq";
1477         }
1478         return fwd;
1479 }
1480
1481 static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1482 {
1483         int len=0;
1484         off_t pos=0;
1485         char temp[64], temp2[32];
1486         int idx;
1487         struct ip_vs_service *svc;
1488         struct ip_vs_dest *dest;
1489         struct list_head *l, *e, *p, *q;
1490
1491         /*
1492          * Note: since the length of the buffer is usually the multiple
1493          * of 512, it is good to use fixed record of the divisor of 512,
1494          * so that records won't be truncated at buffer boundary.
1495          */
1496         pos = 192;
1497         if (pos > offset) {
1498                 sprintf(temp,
1499                         "IP Virtual Server version %d.%d.%d (size=%d)",
1500                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501                 len += sprintf(buf+len, "%-63s\n", temp);
1502                 len += sprintf(buf+len, "%-63s\n",
1503                                "Prot LocalAddress:Port Scheduler Flags");
1504                 len += sprintf(buf+len, "%-63s\n",
1505                                "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1506         }
1507
1508         read_lock_bh(&__ip_vs_svc_lock);
1509
1510         /* print the service table hashed by <protocol,addr,port> */
1511         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512                 l = &ip_vs_svc_table[idx];
1513                 for (e=l->next; e!=l; e=e->next) {
1514                         svc = list_entry(e, struct ip_vs_service, s_list);
1515                         pos += 64;
1516                         if (pos > offset) {
1517                                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518                                         sprintf(temp2, "persistent %d %08X",
1519                                                 svc->timeout,
1520                                                 ntohl(svc->netmask));
1521                                 else
1522                                         temp2[0] = '\0';
1523
1524                                 sprintf(temp, "%s  %08X:%04X %s %s",
1525                                         ip_vs_proto_name(svc->protocol),
1526                                         ntohl(svc->addr),
1527                                         ntohs(svc->port),
1528                                         svc->scheduler->name, temp2);
1529                                 len += sprintf(buf+len, "%-63s\n", temp);
1530                                 if (len >= length)
1531                                         goto done;
1532                         }
1533
1534                         p = &svc->destinations;
1535                         for (q=p->next; q!=p; q=q->next) {
1536                                 dest = list_entry(q, struct ip_vs_dest, n_list);
1537                                 pos += 64;
1538                                 if (pos <= offset)
1539                                         continue;
1540                                 sprintf(temp,
1541                                         "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1542                                         ntohl(dest->addr),
1543                                         ntohs(dest->port),
1544                                         ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545                                         atomic_read(&dest->weight),
1546                                         atomic_read(&dest->activeconns),
1547                                         atomic_read(&dest->inactconns));
1548                                 len += sprintf(buf+len, "%-63s\n", temp);
1549                                 if (len >= length)
1550                                         goto done;
1551                         }
1552                 }
1553         }
1554
1555         /* print the service table hashed by fwmark */
1556         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557                 l = &ip_vs_svc_fwm_table[idx];
1558                 for (e=l->next; e!=l; e=e->next) {
1559                         svc = list_entry(e, struct ip_vs_service, f_list);
1560                         pos += 64;
1561                         if (pos > offset) {
1562                                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563                                         sprintf(temp2, "persistent %d %08X",
1564                                                 svc->timeout,
1565                                                 ntohl(svc->netmask));
1566                                 else
1567                                         temp2[0] = '\0';
1568
1569                                 sprintf(temp, "FWM  %08X %s %s",
1570                                         svc->fwmark,
1571                                         svc->scheduler->name, temp2);
1572                                 len += sprintf(buf+len, "%-63s\n", temp);
1573                                 if (len >= length)
1574                                         goto done;
1575                         }
1576
1577                         p = &svc->destinations;
1578                         for (q=p->next; q!=p; q=q->next) {
1579                                 dest = list_entry(q, struct ip_vs_dest, n_list);
1580                                 pos += 64;
1581                                 if (pos <= offset)
1582                                         continue;
1583                                 sprintf(temp,
1584                                         "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1585                                         ntohl(dest->addr),
1586                                         ntohs(dest->port),
1587                                         ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588                                         atomic_read(&dest->weight),
1589                                         atomic_read(&dest->activeconns),
1590                                         atomic_read(&dest->inactconns));
1591                                 len += sprintf(buf+len, "%-63s\n", temp);
1592                                 if (len >= length)
1593                                         goto done;
1594                         }
1595                 }
1596         }
1597
1598   done:
1599         read_unlock_bh(&__ip_vs_svc_lock);
1600
1601         *start = buf+len-(pos-offset);          /* Start of wanted data */
1602         len = pos-offset;
1603         if (len > length)
1604                 len = length;
1605         if (len < 0)
1606                 len = 0;
1607         return len;
1608 }
1609
1610
1611 struct ip_vs_stats ip_vs_stats;
1612
1613 static int
1614 ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1615 {
1616         int len=0;
1617         off_t pos=0;
1618         char temp[64];
1619
1620         pos += 320;
1621         if (pos > offset) {
1622                 len += sprintf(buf+len, "%-63s\n%-63s\n",
1623 /*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1624                                "   Total Incoming Outgoing         Incoming         Outgoing",
1625                                "   Conns  Packets  Packets            Bytes            Bytes");
1626
1627                 spin_lock_bh(&ip_vs_stats.lock);
1628                 sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1629                         ip_vs_stats.conns,
1630                         ip_vs_stats.inpkts,
1631                         ip_vs_stats.outpkts,
1632                         (__u32)(ip_vs_stats.inbytes>>32),
1633                         (__u32)ip_vs_stats.inbytes,
1634                         (__u32)(ip_vs_stats.outbytes>>32),
1635                         (__u32)ip_vs_stats.outbytes);
1636                 len += sprintf(buf+len, "%-62s\n\n", temp);
1637
1638                 len += sprintf(buf+len, "%-63s\n",
1639 /*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1640                                " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s");
1641                 sprintf(temp, "%8X %8X %8X %16X %16X",
1642                         ip_vs_stats.cps,
1643                         ip_vs_stats.inpps,
1644                         ip_vs_stats.outpps,
1645                         ip_vs_stats.inbps,
1646                         ip_vs_stats.outbps);
1647                 len += sprintf(buf+len, "%-63s\n", temp);
1648
1649                 spin_unlock_bh(&ip_vs_stats.lock);
1650         }
1651
1652         *start = buf+len-(pos-offset);          /* Start of wanted data */
1653         len = pos-offset;
1654         if (len > length)
1655                 len = length;
1656         if (len < 0)
1657                 len = 0;
1658         return len;
1659 }
1660
1661
1662 /*
1663  * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1664  */
1665 static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1666 {
1667         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1668                   u->tcp_timeout,
1669                   u->tcp_fin_timeout,
1670                   u->udp_timeout);
1671
1672         if (u->tcp_timeout) {
1673                 vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674                         = u->tcp_timeout * HZ;
1675         }
1676
1677         if (u->tcp_fin_timeout) {
1678                 vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679                         = u->tcp_fin_timeout * HZ;
1680         }
1681
1682         if (u->udp_timeout) {
1683                 vs_timeout_table.timeout[IP_VS_S_UDP]
1684                         = u->udp_timeout * HZ;
1685         }
1686         return 0;
1687 }
1688
1689
1690 static int
1691 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1692 {
1693         int ret;
1694         struct ip_vs_rule_user *urule;
1695         struct ip_vs_service *svc = NULL;
1696
1697         if (!capable(CAP_NET_ADMIN))
1698                 return -EPERM;
1699
1700         /*
1701          * Check the size of mm, no overflow...
1702          * len > 128000 is a sanity check.
1703          */
1704         if (len < sizeof(struct ip_vs_rule_user)) {
1705                 IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706                           len, sizeof(struct ip_vs_rule_user));
1707                 return -EINVAL;
1708         } else if (len > 128000) {
1709                 IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1710                 return -EINVAL;
1711         } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712                 IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1713                 return -ENOMEM;
1714         } else if (copy_from_user(urule, user, len) != 0) {
1715                 ret = -EFAULT;
1716                 goto out_free;
1717         }
1718
1719         MOD_INC_USE_COUNT;
1720         if (down_interruptible(&__ip_vs_mutex)) {
1721                 ret = -ERESTARTSYS;
1722                 goto out_dec;
1723         }
1724
1725         if (cmd == IP_VS_SO_SET_FLUSH) {
1726                 /* Flush the virtual service */
1727                 ret = ip_vs_flush();
1728                 goto out_unlock;
1729         } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730                 /* Set timeout values for (tcp tcpfin udp) */
1731                 ret = ip_vs_set_timeouts(urule);
1732                 goto out_unlock;
1733         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734                 ret = start_sync_thread(urule->state, urule->mcast_ifn,
1735                                         urule->syncid);
1736                 goto out_unlock;
1737         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738                 ret = stop_sync_thread(urule->state);
1739                 goto out_unlock;
1740         } else if (cmd == IP_VS_SO_SET_ZERO) {
1741                 /* if no service address is set, zero counters in all */
1742                 if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743                         ret = ip_vs_zero_all();
1744                         goto out_unlock;
1745                 }
1746         }
1747
1748         /*
1749          * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1750          */
1751         if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752                 IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753                           urule->protocol, NIPQUAD(urule->vaddr),
1754                           ntohs(urule->vport), urule->sched_name);
1755                 ret = -EFAULT;
1756                 goto out_unlock;
1757         }
1758
1759         /*
1760          * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1761          */
1762         if (urule->vfwmark == 0)
1763                 svc = __ip_vs_service_get(urule->protocol,
1764                                           urule->vaddr, urule->vport);
1765         else
1766                 svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1767
1768         if (cmd != IP_VS_SO_SET_ADD
1769             && (svc == NULL || svc->protocol != urule->protocol)) {
1770                 ret = -ESRCH;
1771                 goto out_unlock;
1772         }
1773
1774         switch (cmd) {
1775         case IP_VS_SO_SET_ADD:
1776                 if (svc != NULL)
1777                         ret = -EEXIST;
1778                 else
1779                         ret = ip_vs_add_service(urule, &svc);
1780                 break;
1781         case IP_VS_SO_SET_EDIT:
1782                 ret = ip_vs_edit_service(svc, urule);
1783                 break;
1784         case IP_VS_SO_SET_DEL:
1785                 ret = ip_vs_del_service(svc);
1786                 if (!ret)
1787                         goto out_unlock;
1788                 break;
1789         case IP_VS_SO_SET_ADDDEST:
1790                 ret = ip_vs_add_dest(svc, urule);
1791                 break;
1792         case IP_VS_SO_SET_EDITDEST:
1793                 ret = ip_vs_edit_dest(svc, urule);
1794                 break;
1795         case IP_VS_SO_SET_DELDEST:
1796                 ret = ip_vs_del_dest(svc, urule);
1797                 break;
1798         case IP_VS_SO_SET_ZERO:
1799                 ret = ip_vs_zero_service(svc);
1800                 break;
1801         default:
1802                 ret = -EINVAL;
1803         }
1804
1805         if (svc)
1806                 ip_vs_service_put(svc);
1807
1808   out_unlock:
1809         up(&__ip_vs_mutex);
1810   out_dec:
1811         MOD_DEC_USE_COUNT;
1812   out_free:
1813         kfree(urule);
1814         return ret;
1815 }
1816
1817
1818 static inline void
1819 __ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1820 {
1821         spin_lock_bh(&src->lock);
1822         memcpy(dst, src, (char*)&src->lock - (char*)src);
1823         spin_unlock_bh(&src->lock);
1824 }
1825
1826 static inline int
1827 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828                             struct ip_vs_get_services *uptr)
1829 {
1830         int idx, count=0;
1831         struct ip_vs_service *svc;
1832         struct list_head *l;
1833         struct ip_vs_service_user entry;
1834         int ret = 0;
1835
1836         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837                 list_for_each (l, &ip_vs_svc_table[idx]) {
1838                         if (count >= get->num_services)
1839                                 goto out;
1840                         svc = list_entry(l, struct ip_vs_service, s_list);
1841                         entry.protocol = svc->protocol;
1842                         entry.addr = svc->addr;
1843                         entry.port = svc->port;
1844                         entry.fwmark = svc->fwmark;
1845                         strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1846                         entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1847                         entry.flags = svc->flags;
1848                         entry.timeout = svc->timeout / HZ;
1849                         entry.netmask = svc->netmask;
1850                         entry.num_dests = svc->num_dests;
1851                         __ip_vs_copy_stats(&entry.stats, &svc->stats);
1852                         if (copy_to_user(&uptr->entrytable[count],
1853                                          &entry, sizeof(entry))) {
1854                                 ret = -EFAULT;
1855                                 goto out;
1856                         }
1857                         count++;
1858                 }
1859         }
1860
1861         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1862                 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1863                         if (count >= get->num_services)
1864                                 goto out;
1865                         svc = list_entry(l, struct ip_vs_service, f_list);
1866                         entry.protocol = svc->protocol;
1867                         entry.addr = svc->addr;
1868                         entry.port = svc->port;
1869                         entry.fwmark = svc->fwmark;
1870                         strncpy(entry.sched_name, svc->scheduler->name, sizeof(entry.sched_name));
1871                         entry.sched_name[sizeof(entry.sched_name) - 1] = 0;
1872                         entry.flags = svc->flags;
1873                         entry.timeout = svc->timeout / HZ;
1874                         entry.netmask = svc->netmask;
1875                         entry.num_dests = svc->num_dests;
1876                         __ip_vs_copy_stats(&entry.stats, &svc->stats);
1877                         if (copy_to_user(&uptr->entrytable[count],
1878                                          &entry, sizeof(entry))) {
1879                                 ret = -EFAULT;
1880                                 goto out;
1881                         }
1882                         count++;
1883                 }
1884         }
1885  out:
1886         return ret;
1887 }
1888
1889 static inline int
1890 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1891                          struct ip_vs_get_dests *uptr)
1892 {
1893         struct ip_vs_service *svc;
1894         int ret = 0;
1895
1896         if (get->fwmark)
1897                 svc = __ip_vs_svc_fwm_get(get->fwmark);
1898         else
1899                 svc = __ip_vs_service_get(get->protocol,
1900                                           get->addr, get->port);
1901         if (svc) {
1902                 int count = 0;
1903                 struct ip_vs_dest *dest;
1904                 struct list_head *l, *e;
1905                 struct ip_vs_dest_user entry;
1906
1907                 l = &svc->destinations;
1908                 for (e=l->next; e!=l; e=e->next) {
1909                         if (count >= get->num_dests)
1910                                 break;
1911                         dest = list_entry(e, struct ip_vs_dest, n_list);
1912                         entry.addr = dest->addr;
1913                         entry.port = dest->port;
1914                         entry.flags = atomic_read(&dest->conn_flags);
1915                         entry.weight = atomic_read(&dest->weight);
1916                         entry.activeconns = atomic_read(&dest->activeconns);
1917                         entry.inactconns = atomic_read(&dest->inactconns);
1918                         __ip_vs_copy_stats(&entry.stats, &dest->stats);
1919                         if (copy_to_user(&uptr->entrytable[count],
1920                                          &entry, sizeof(entry))) {
1921                                 ret = -EFAULT;
1922                                 break;
1923                         }
1924                         count++;
1925                 }
1926                 ip_vs_service_put(svc);
1927         } else
1928                 ret = -ESRCH;
1929         return ret;
1930 }
1931
1932 static inline void
1933 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1934 {
1935         u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1936         u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1937         u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1938 }
1939
1940 static int
1941 do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1942 {
1943         int ret = 0;
1944
1945         if (!capable(CAP_NET_ADMIN))
1946                 return -EPERM;
1947
1948         if (down_interruptible(&__ip_vs_mutex))
1949                 return -ERESTARTSYS;
1950
1951         switch (cmd) {
1952         case IP_VS_SO_GET_VERSION:
1953         {
1954                 char buf[64];
1955
1956                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1957                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1958                 if (*len < strlen(buf)+1) {
1959                         ret = -EINVAL;
1960                         goto out;
1961                 }
1962                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1963                         ret = -EFAULT;
1964                         goto out;
1965                 }
1966                 *len = strlen(buf)+1;
1967         }
1968         break;
1969
1970         case IP_VS_SO_GET_INFO:
1971         {
1972                 struct ip_vs_getinfo info;
1973                 info.version = IP_VS_VERSION_CODE;
1974                 info.size = IP_VS_CONN_TAB_SIZE;
1975                 info.num_services = ip_vs_num_services;
1976                 if (copy_to_user(user, &info, sizeof(info)) != 0)
1977                         ret = -EFAULT;
1978         }
1979         break;
1980
1981         case IP_VS_SO_GET_SERVICES:
1982         {
1983                 struct ip_vs_get_services get;
1984
1985                 if (*len < sizeof(get)) {
1986                         IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1987                         ret = -EINVAL;
1988                         goto out;
1989                 }
1990                 if (copy_from_user(&get, user, sizeof(get))) {
1991                         ret = -EFAULT;
1992                         goto out;
1993                 }
1994                 if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1995                         IP_VS_ERR("length: %u != %Zu\n", *len,
1996                                   sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1997                         ret = -EINVAL;
1998                         goto out;
1999                 }
2000                 ret = __ip_vs_get_service_entries(&get, user);
2001         }
2002         break;
2003
2004         case IP_VS_SO_GET_SERVICE:
2005         {
2006                 struct ip_vs_service_user get;
2007                 struct ip_vs_service *svc;
2008
2009                 if (*len != sizeof(get)) {
2010                         IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2011                         ret = -EINVAL;
2012                         goto out;
2013                 }
2014                 if (copy_from_user(&get, user, sizeof(get))) {
2015                         ret = -EFAULT;
2016                         goto out;
2017                 }
2018
2019                 if (get.fwmark)
2020                         svc = __ip_vs_svc_fwm_get(get.fwmark);
2021                 else
2022                         svc = __ip_vs_service_get(get.protocol,
2023                                                   get.addr, get.port);
2024                 if (svc) {
2025                         strncpy(get.sched_name, svc->scheduler->name, sizeof(get.sched_name));
2026                         get.sched_name[sizeof(get.sched_name) - 1] = 0;
2027                         get.flags = svc->flags;
2028                         get.timeout = svc->timeout / HZ;
2029                         get.netmask = svc->netmask;
2030                         get.num_dests = svc->num_dests;
2031                         __ip_vs_copy_stats(&get.stats, &svc->stats);
2032                         if (copy_to_user(user, &get, *len) != 0)
2033                                 ret = -EFAULT;
2034                         ip_vs_service_put(svc);
2035                 } else
2036                         ret = -ESRCH;
2037         }
2038         break;
2039
2040         case IP_VS_SO_GET_DESTS:
2041         {
2042                 struct ip_vs_get_dests get;
2043
2044                 if (*len < sizeof(get)) {
2045                         IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2046                         ret = -EINVAL;
2047                         goto out;
2048                 }
2049                 if (copy_from_user(&get, user, sizeof(get))) {
2050                         ret = -EFAULT;
2051                         goto out;
2052                 }
2053                 if (*len != (sizeof(get) +
2054                              sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2055                         IP_VS_ERR("length: %u != %Zu\n", *len,
2056                                   sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2057                         ret = -EINVAL;
2058                         goto out;
2059                 }
2060                 ret = __ip_vs_get_dest_entries(&get, user);
2061         }
2062         break;
2063
2064         case IP_VS_SO_GET_TIMEOUTS:
2065         {
2066                 struct ip_vs_timeout_user u;
2067
2068                 if (*len < sizeof(u)) {
2069                         IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2070                         ret = -EINVAL;
2071                         goto out;
2072                 }
2073                 __ip_vs_get_timeouts(&u);
2074                 if (copy_to_user(user, &u, sizeof(u)) != 0)
2075                         ret = -EFAULT;
2076         }
2077         break;
2078
2079         case IP_VS_SO_GET_DAEMON:
2080         {
2081                 struct ip_vs_daemon_user u;
2082
2083                 if (*len < sizeof(u)) {
2084                         IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2085                         ret = -EINVAL;
2086                         goto out;
2087                 }
2088                 u.state = ip_vs_sync_state;
2089                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2090                         strncpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn, sizeof(u.mcast_master_ifn));
2091                         u.mcast_master_ifn[sizeof(u.mcast_master_ifn) - 1] = 0;
2092                 }
2093                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2094                         strncpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn, sizeof(u.mcast_backup_ifn));
2095                         u.mcast_backup_ifn[sizeof(u.mcast_backup_ifn) - 1] = 0;
2096                 }
2097                 if (copy_to_user(user, &u, sizeof(u)) != 0)
2098                         ret = -EFAULT;
2099         }
2100         break;
2101
2102         default:
2103                 ret = -EINVAL;
2104         }
2105
2106   out:
2107         up(&__ip_vs_mutex);
2108         return ret;
2109 }
2110
2111
2112 static struct nf_sockopt_ops ip_vs_sockopts = {
2113         { NULL, NULL }, PF_INET,
2114         IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2115         IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2116 };
2117
2118
2119 int ip_vs_control_init(void)
2120 {
2121         int ret;
2122         int idx;
2123
2124         EnterFunction(2);
2125
2126         ret = nf_register_sockopt(&ip_vs_sockopts);
2127         if (ret) {
2128                 IP_VS_ERR("cannot register sockopt.\n");
2129                 return ret;
2130         }
2131
2132         proc_net_create("ip_vs", 0, ip_vs_get_info);
2133         proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2134
2135         ipv4_vs_table.sysctl_header =
2136                 register_sysctl_table(ipv4_vs_table.root_dir, 0);
2137         /*
2138          * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2139          * ip_vs_schedulers.
2140          */
2141         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2142                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2143                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2144         }
2145         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2146                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2147         }
2148
2149         memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2150         ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2151         ip_vs_new_estimator(&ip_vs_stats);
2152
2153         /* Hook the defense timer */
2154         init_timer(&defense_timer);
2155         defense_timer.function = defense_timer_handler;
2156         defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2157         add_timer(&defense_timer);
2158
2159         LeaveFunction(2);
2160         return 0;
2161 }
2162
2163 void ip_vs_control_cleanup(void)
2164 {
2165         EnterFunction(2);
2166         ip_vs_trash_cleanup();
2167         del_timer_sync(&defense_timer);
2168         ip_vs_kill_estimator(&ip_vs_stats);
2169         unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2170         proc_net_remove("ip_vs_stats");
2171         proc_net_remove("ip_vs");
2172         nf_unregister_sockopt(&ip_vs_sockopts);
2173         LeaveFunction(2);
2174 }