don't try to start containers without rootfs mounted
[sysadmin-cookbook] / recepies / lxc / lxc-watchdog.sh
index 04a0d98..a68d33b 100755 (executable)
-#!/bin/bash -x
+#! /bin/sh
+### BEGIN INIT INFO
+# Provides:          lxc-watchdog
+# Required-Start:    $remote_fs $named $network $time
+# Required-Stop:     $remote_fs $named $network
+# Required-Start:    
+# Required-Stop:     
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Short-Description: Manage Linux Containers startup/shutdown
+# Description:       Uses clever inotify hack to monitor container's
+#                    halt/reboot events watching /var/run/utmp
+### END INIT INFO
 
-# based on Tony Risinger code from lxc-users
+# Author: Dobrica Pavlinusic <dpavlin@rot13.org>
+#
+# based on Tony Risinger post to lxc-users mailing list
 # http://www.mail-archive.com/lxc-users@lists.sourceforge.net/msg00074.html
+#
+# Install with:
+# ln -sf /srv/sysadmin-cookbook/recepies/lxc/lxc-watchdog.sh /etc/init.d/lxc-watchdog
+# update-rc.d lxc-watchdog defaults
 
-which inotifywait || apt-get install inotify-tools
 
-name=llin
+which inotifywait >/dev/null || apt-get install inotify-tools
 
-#lxc-info -n $name | grep RUNNING && exit
 
-rootfs=`grep lxc.rootfs /var/lib/lxc/$name/config | cut -d= -f2`
-echo "$name rootfs $rootfs"
+lxc_exists() {
+       name=$1
 
-# fix lxc-stop which remounts ro
-mount /mnt/llin -o remount,rw
+       if [ ! -e /var/lib/lxc/$name/config ] ; then
+               echo "Usage: $0 name"
+               lxc_status
+               exit 1
+       fi
+}
+
+
+lxc_rootfs() {
+       grep '^ *lxc\.rootfs *=' "/var/lib/lxc/$1/config" | cut -d= -f2 | sed 's/^ *//'
+}
+
+
+lxc_status() {
+       ( find /var/lib/lxc/ -name "config" | cut -d/ -f5 | sort -u | xargs -i lxc-info -n {} | sed "s/'//g" | while read name is status ; do
+               boot="-"
+               hostname=`cat $(lxc_rootfs $name)/etc/hostname`
+               ip=`grep address $(lxc_rootfs $name)/etc/network/interfaces | sed 's/.*address //'`
+               test -s /var/lib/lxc/$name/on_boot && boot="boot"
+               echo "$name $status $boot $(lxc_rootfs $name) $ip $hostname"
+       done ) | column -t
+}
+
+
+cleanup_init_scripts() {
+       rootfs=$(lxc_rootfs $1)
+
+       ls \
+               $rootfs/etc/rc?.d/*umountfs \
+               $rootfs/etc/rc?.d/*umountroot \
+               $rootfs/etc/rc?.d/*hwclock* \
+               $rootfs/etc/rc?.d/*udev* \
+       2>/dev/null | xargs -i rm -v {}
+
+       echo $1 > $rootfs/etc/hostname
+}
+
+
+setup_inittab() {
+       rootfs=$(lxc_rootfs $1)
+       remove=$2
+       add=$3
+
+       # let container respond to kill -SIGPWR
+       inittab=$rootfs/etc/inittab
+       if test -e $inittab && ! grep "$add" ${inittab} >/dev/null ; then
+               grep -v "$remove" ${inittab} > ${inittab}.new
+               echo $add >> ${inittab}.new
+               mv ${inittab}.new ${inittab}
+               echo "$inittab modified with $add"
+       fi
+}
+
+
+lxc_log() {
+       echo `date +%Y-%m-%dT%H:%M:%S` $*
+}
+
+
+lxc_kill() {
+       name=$1
+       sig=$2
+
+       init_pid=`lxc-ps -C init -o pid | grep "^$name" | cut -d" " -f2-`
+       if [ -z "$init_pid" ] ; then
+               lxc-info -n $name
+               exit 1
+       fi
+       lxc_log "$name kill $sig $init_pid"
+       /bin/kill $sig $init_pid
+}
+
+lxc_stop() {
+       lxc_log "$name stop"
+       lxc_kill $name -SIGPWR
+       lxc-wait -n $name -s STOPPED
+       lxc_log "$name stoped"
+#      rm -f /var/lib/lxc/${name}/on_boot
+}
+
+
+lxc_start() {
+       name=$1
+       rootfs=$(lxc_rootfs $1)
 
-lxc-start -d -n $name -o /tmp/${name}.log
+       if [ ! -e $rootfs ] ; then
+               echo "ERROR $name rootfs $rootfs not found"
+               return
+       fi
+
+       if ! lxc-info -n $name | grep RUNNING ; then
+               lxc_log "$name start"
+               lxc-start -n $name -o /tmp/${name}.log -d
+               lxc-wait  -n $name -s RUNNING
+               lxc-info  -n $name
+               test -f /var/lib/lxc/${name}/on_boot || echo $name > /var/lib/lxc/${name}/on_boot
+       fi
+}
+
+lxc_watchdog() {
+name=$1
+rootfs=$(lxc_rootfs $1)
 
 while true; do
-       # time of 5 minutes on it JUST IN CASE...
        vps_utmp=${rootfs}/var/run/utmp
-       inotifywait -qqt 300 ${vps_utmp}
-       if [ $(wc -l < /cgroup/${name}/tasks) -eq 1 ]; then
+       tasks=`wc -l < /cgroup/${name}/tasks`
+       test -z "$tasks" && exit 1
+       if [ "$tasks" -eq 1 ]; then
 
                runlevel="$(runlevel ${vps_utmp})"
-               echo "# $name runlevel $runlevel"
+               lxc_log "$name runlevel $runlevel"
 
                case $runlevel in
                N*)
                        # nothing for new boot state
                ;;
                ??0)
-                       echo "$name halt"
+                       lxc_log "$name halt"
                        lxc-stop -n "${name}"
+                       lxc-wait -n ${name} -s STOPPED
                        break
                ;;
                ??6)
-                       echo "$name reboot";
+                       lxc_log "$name reboot";
                        lxc-stop -n ${name}
                        lxc-wait -n ${name} -s STOPPED
-                       mount /mnt/llin -o remount,rw
                        lxc-start -d -n ${name} -o /tmp/${name}.log
                ;;
                *)
@@ -48,6 +162,84 @@ while true; do
                        [ "$state" = "RUNNING" ] || break
                ;;
                esac
+       else
+               lxc_log "$name $tasks tasks"
        fi
+
+       # time of 5 minutes on it JUST IN CASE...
+       inotifywait -qqt 300 ${vps_utmp}
 done
 
+lxc_log "$name watchdog exited"
+
+}
+
+
+usage() {
+       echo "Usage: $0 {start|stop|restart|status|boot|disable} [name name ... ]" >&2
+       exit 3
+}
+
+command_on_lxc() {
+command=$1
+shift
+
+echo "# $command $1"
+
+case "$command" in
+
+start)
+       lxc_exists $1
+       cleanup_init_scripts $1
+       setup_inittab $1 ":respawn:/sbin/getty.*tty1"   "c1:12345:respawn:/sbin/getty 38400 tty1 linux"
+       setup_inittab $1 "::power"                      "p0::powerfail:/sbin/init 0"
+       setup_inittab $1 "::ctrlaltdel"                 "p6::ctrlaltdel:/sbin/init 6"
+       lxc_start $1
+       # give container 5 seconds to start more than one process
+       ( sleep 5 ; nohup $0 watchdog $1 >> /tmp/$1.log 2>/dev/null ) &
+       ;;
+stop|halt)
+       lxc_exists $1
+       lxc_stop $1
+       ;;
+reload|force-reload|restart|reboot)
+       lxc_kill $1 -SIGINT
+       ;;
+watchdog)
+       lxc_watchdog $1
+       ;;
+boot)
+       echo $1 > /var/lib/lxc/$1/on_boot
+       ;;
+disable)
+       echo -n > /var/lib/lxc/$1/on_boot
+       ;;
+*)
+       usage
+       ;;
+
+esac
+
+}
+
+command=$1
+test -z "$command" && usage
+test "$command" = "status" && lxc_status && exit
+shift
+
+if [ -z "$1" ] ; then
+       ls /var/lib/lxc/*/on_boot | while read path ; do
+               name=`echo $path | cut -d/ -f5`
+               if [ "$command" != "start" -o "$command" = "start" -a -s $path ] ; then
+                       command_on_lxc $command $name
+               else
+                       echo "# skip $command $name"
+               fi
+       done
+else
+       while [ ! -z "$1" ] ; do
+               command_on_lxc $command $1
+               shift
+       done
+fi
+