use ssh_host to get IP
[sysadmin-cookbook] / recepies / lxc / lxc-watchdog.sh
1 #! /bin/sh
2 ### BEGIN INIT INFO
3 # Provides:          lxc-watchdog
4 # Required-Start:    $remote_fs $named $network $time
5 # Required-Stop:     $remote_fs $named $network
6 # Required-Start:    
7 # Required-Stop:     
8 # Default-Start:     2 3 4 5
9 # Default-Stop:      0 1 6
10 # Short-Description: Manage Linux Containers startup/shutdown
11 # Description:       Uses clever inotify hack to monitor container's
12 #                    halt/reboot events watching /var/run/utmp
13 ### END INIT INFO
14
15 # Author: Dobrica Pavlinusic <dpavlin@rot13.org>
16 #
17 # based on Tony Risinger post to lxc-users mailing list
18 # http://www.mail-archive.com/lxc-users@lists.sourceforge.net/msg00074.html
19 #
20 # Install with:
21 # ln -sf /srv/sysadmin-cookbook/recepies/lxc/lxc-watchdog.sh /etc/init.d/lxc-watchdog
22 # update-rc.d lxc-watchdog defaults
23
24
25 which inotifywait >/dev/null || apt-get install inotify-tools
26
27
28 lxc_exists() {
29         name=$1
30
31         if [ ! -e /var/lib/lxc/$name/config ] ; then
32                 echo "Usage: $0 name"
33                 lxc_status
34                 exit 1
35         fi
36 }
37
38
39 lxc_rootfs() {
40         grep '^ *lxc\.rootfs *=' "/var/lib/lxc/$1/config" | cut -d= -f2 | sed 's/^ *//'
41 }
42
43 lxc_hostname() {
44         inside=`cat $(lxc_rootfs $1)/etc/hostname`
45         config=`grep lxc.utsname /var/lib/lxc/$name/config | cut -d= -f2`
46         echo "$config [$inside]";
47 }
48
49 lxc_ip() {
50         ( grep lxc.network.ipv4 /var/lib/lxc/$name/config | grep -v '^#' | cut -d= -f2 || \
51         grep address $(lxc_rootfs $name)/etc/network/interfaces | grep -v '^#' | sed 's/.*address //' ) | \
52         head -1 | \
53         sed -e 's/ *//g' -e 's/\/.*$//'
54 }
55
56 lxc_status() {
57         ( find /var/lib/lxc/ -name "config" | cut -d/ -f5 | sort -u | while read name ; do
58                 status=`lxc-info -n $name 2>/dev/null | grep state: | cut -d: -f2`     # 0.7.5
59                 test -z "$status" && status=`lxc-info -n $name | sed -e 's/^.* is //'` # 0.7.2
60                 boot="-"
61                 test -s /var/lib/lxc/$name/on_boot && boot="boot"
62                 echo "$name $status $boot $(lxc_rootfs $name) $(lxc_ip $name) $(lxc_hostname $name)"
63         done ) | column -t
64 }
65
66
67 cleanup_init_scripts() {
68         rootfs=$(lxc_rootfs $1)
69
70         ls \
71                 $rootfs/etc/rc?.d/*checkroot* \
72                 $rootfs/etc/rc?.d/*umountfs \
73                 $rootfs/etc/rc?.d/*umountroot \
74                 $rootfs/etc/rc?.d/*hwclock* \
75                 $rootfs/etc/rc?.d/*udev* \
76                 $rootfs/etc/rc?.d/*checkfs* \
77         2>/dev/null | xargs -i rm -v {}
78
79         echo $1 > $rootfs/etc/hostname
80         grep $1 $rootfs/etc/hosts || echo "$(lxc_ip $1) $1" >> $rootfs/etc/hosts
81 }
82
83
84 setup_inittab() {
85         rootfs=$(lxc_rootfs $1)
86         remove=$2
87         add=$3
88
89         # let container respond to kill -SIGPWR
90         inittab=$rootfs/etc/inittab
91         if test -e $inittab && ! grep "$add" ${inittab} >/dev/null ; then
92                 grep -v "$remove" ${inittab} > ${inittab}.new
93                 echo $add >> ${inittab}.new
94                 mv ${inittab}.new ${inittab}
95                 echo "$inittab modified with $add"
96         fi
97 }
98
99
100 lxc_log() {
101         echo `date +%Y-%m-%dT%H:%M:%S` $*
102 }
103
104
105 lxc_kill() {
106         name=$1
107         sig=$2
108
109         ver=`lxc-version | cut -d: -f2 | sed 's/\.//g'`
110         opts=''
111         test $ver -ge 075 && opts='--'
112
113         init_pid=`lxc-ps $opts -C init -o pid | grep "^$name" | cut -d" " -f2-`
114         if [ -z "$init_pid" ] ; then
115                 lxc-info -n $name
116                 exit 1
117         fi
118         lxc_log "$name kill $sig $init_pid"
119         /bin/kill $sig $init_pid
120 }
121
122 lxc_stop() {
123         lxc_log "$name stop"
124         lxc_kill $name -SIGPWR
125         lxc-wait -n $name -s STOPPED
126         lxc_log "$name stoped"
127 #       rm -f /var/lib/lxc/${name}/on_boot
128 }
129
130
131 lxc_start() {
132         name=$1
133         rootfs=$(lxc_rootfs $1)
134
135         if [ ! -e $rootfs ] ; then
136                 echo "ERROR $name rootfs $rootfs not found"
137                 return
138         fi
139
140         if ! lxc-info -n $name | grep RUNNING ; then
141                 lxc_log "$name start"
142                 dev=`df -P $rootfs | tail -1 | cut -d" " -f1`
143                 mount $dev -o remount,rw # fix debian upgrade which remounts dir ro
144                 lxc-start -n $name -o /tmp/${name}.log -d
145                 lxc-wait  -n $name -s RUNNING
146                 lxc-info  -n $name
147                 test -f /var/lib/lxc/${name}/on_boot || echo $name > /var/lib/lxc/${name}/on_boot
148         fi
149 }
150
151 lxc_watchdog() {
152 name=$1
153 cgroup=$(mount -t cgroup | awk '{ print $3 }')
154 test -d "$cgroup/lxc/$1" && cgroup="$cgroup/lxc"
155 rootfs=$(lxc_rootfs $1)
156 run=$rootfs/var/run
157 test -L $run && run=$rootfs/`readlink $run` # recent Debian have symlink to /run
158 cd $run || echo "can't cd watchdog into $run"
159
160 while true; do
161         tasks=`wc -l < $cgroup/${name}/tasks`
162         stop_on=1 # init
163         sulogins=`lxc-ps --name $name | grep sulogin | wc -l`
164         if [ "$sulogins" -gt 0 ] ; then
165                 stop_on=`expr $stop_on + $sulogins`
166         fi
167
168         test -z "$tasks" && exit 1
169         if [ "$tasks" -eq $stop_on ]; then
170
171                 runlevel="$(runlevel utmp)"
172                 lxc_log "$name runlevel $runlevel"
173
174                 case $runlevel in
175                 N*)
176                         # nothing for new boot state
177                 ;;
178                 ??0|unknown)
179                         lxc_log "$name halt"
180                         lxc-stop -n "${name}"
181                         lxc-wait -n ${name} -s STOPPED
182                         break
183                 ;;
184                 ??6)
185                         lxc_log "$name reboot";
186                         lxc-stop -n ${name}
187                         lxc-wait -n ${name} -s STOPPED
188                         lxc-start -d -n ${name} -o /tmp/${name}.log
189                 ;;
190                 *)
191                         # make sure vps is still running
192                         state="$(lxc-info -n "${name}" | sed -e 's/.* is //')"
193                         [ "$state" = "RUNNING" ] || break
194                 ;;
195                 esac
196         else
197                 lxc_log "$name $tasks tasks $sulogins console"
198         fi
199
200         # time of 5 minutes on it JUST IN CASE...
201         inotifywait -qqt 300 utmp
202 done
203
204 lxc_log "$name watchdog exited"
205
206 }
207
208
209 usage() {
210         echo "Usage: $0 {start|stop|restart|status|boot|disable} [name name ... ]" >&2
211         exit 3
212 }
213
214 command_on_lxc() {
215 command=$1
216 shift
217
218 echo "# $command $1"
219
220 case "$command" in
221
222 start)
223         lxc_exists $1
224         cleanup_init_scripts $1
225         setup_inittab $1 ":respawn:/sbin/getty.*tty1"   "c1:12345:respawn:/sbin/getty 38400 tty1 linux"
226         setup_inittab $1 "::power"                      "p0::powerfail:/sbin/init 0"
227         setup_inittab $1 "::ctrlaltdel"                 "p6::ctrlaltdel:/sbin/init 6"
228         lxc_start $1
229         # give container 5 seconds to start more than one process
230         ( sleep 5 ; nohup $0 watchdog $1 >> /tmp/$1.log 2>/dev/null ) &
231         ;;
232 stop|halt)
233         lxc_exists $1
234         lxc_stop $1
235         ;;
236 reload|force-reload|restart|reboot)
237         lxc_kill $1 -SIGINT
238         ;;
239 watchdog)
240         lxc_watchdog $1
241         ;;
242 boot)
243         echo $1 > /var/lib/lxc/$1/on_boot
244         ;;
245 disable)
246         echo -n > /var/lib/lxc/$1/on_boot
247         ;;
248 *)
249         usage
250         ;;
251
252 esac
253
254 }
255
256 command=$1
257 test -z "$command" && usage
258 test "$command" = "status" && lxc_status && exit
259 shift
260
261 if [ -z "$1" ] ; then
262         ls /var/lib/lxc/*/on_boot | while read path ; do
263                 name=`echo $path | cut -d/ -f5`
264                 if [ "$command" != "start" -o "$command" = "start" -a -s $path ] ; then
265                         command_on_lxc $command $name
266                 else
267                         echo "# skip $command $name"
268                 fi
269         done
270 else
271         while [ ! -z "$1" ] ; do
272                 command_on_lxc $command $1
273                 shift
274         done
275 fi
276