3 #Postgres data directory
4 postgres_datadir='/var/lib/pgsql/9.6/data'
5 #Postgres configuration directory
6 postgres_configdir='/var/lib/pgsql/9.6/data'
8 postgres_user_key='/var/lib/pgsql/.ssh/id_rsa'
9 #Pgpool configuration directory
10 pgpool_configdir='/etc/pgpool-II'
12 # used by pcp_* utils to find password
13 export PCPPASSFILE=/etc/pgpool-II/.pcppass
16 if [ -f '/tmp/postgres_master' ]
18 #Get current postgres master id
19 current_master_id=$(cat /tmp/postgres_master);
21 echo "[ERROR] /tmp/postgres_master not found !";
25 #Get postgres master name
26 current_master_name=$(pcp_node_info --host localhost --port 9898 --username postgres --no-password --node-id $current_master_id | cut -d' ' -f1)
27 #Get postgres slave id
28 [ $current_master_id == 0 ] && current_slave_id=1 || current_slave_id=0
29 #Get postgres slave name
30 current_slave_name=$(pcp_node_info --host localhost --port 9898 --username postgres --no-password --node-id $current_slave_id | cut -d' ' -f1)
32 #Test if pgpool is running
33 CheckIfPgpoolIsRunning () {
34 #Send signal 0 to pgpool to check if it's running
35 if ! systemctl -q is-active pgpool; then echo "[ERROR] Pgpool is not running !"; exit 1; fi;
38 AttachNodeToPgpool () {
39 #pcp_attach_node is a command that permit to attach a specific postgres server (identified by 6th parameter) to pgpool.
40 #pcp_attach_node dont return a good error code when it fails so here if I catch "BackendError" message in stderr I presume
41 #that attachment failed.
42 #TODO:find a condition to break the folowing loop if attachment fails.
43 while [ "`pcp_attach_node --host localhost --port 9898 --user postgres --no-password --node-id $1`" == "BackendError" ]
45 pcp_attach_node --host localhost --port 9898 --user postgres --no-password --node-id $1;
46 #This sleep is recommanded to avoid stressing pgpool in this infinite loop.
51 #Whether the slave node is down, start it and attach it to pgpool's backend pool.
52 ReattachDegeneratedSlave () {
54 echo "[INFO] Slave node '$current_slave_name' is down. Performing postgres server reboot..."
55 #Remote postgres reboot via ssh
56 ssh -i $postgres_user_key postgres@$current_slave_name "sudo systemctl restart postgresql-9.6"
57 #Test if postgres is running
58 status=$(systemctl --host $current_slave_name is-active postgresql-9.6)
59 if [ $status != "active" ]
61 echo "[ERROR] Postgres slave still down !";
64 echo "[OK] Slave node successfully started.";
67 #Do 'slave online recovery' to force slave sync if it has incoherent data relatevely to master.
68 #echo "[INFO] Starting online recovery for slave '$current_slave_name' ..."
69 #ssh -i /var/lib/pgsql/.ssh/id_rsa postgres@$current_slave_name "bash /var/lib/pgsql/streaming-replication.sh $current_master_name"
70 #Atttach slave (even master) to pgpool's backends pool
71 #Reattach the master node if you have performed an online recovery for slave node and not juste a simple reboot.
72 #Attempting to reatach master to pgpool's backend pool
73 #echo "[INFO] Attaching master node '$current_master_name' ..."
74 #AttachNodeToPgpool "$current_master_id"
75 #echo "[OK] Master node '$current_master_name' has been successfully reattached to pgpool."
76 #Attempting to reattach slave to pgpool's backend pool
77 echo "[INFO] Attaching slave node '$current_slave_name'..."
78 AttachNodeToPgpool "$current_slave_id"
79 echo "[OK] Slave node '$current_slave_name' has been successfully reattached to pgpool."
83 #Whether the master is down do the folowing operations :
84 SwitchOldMasterToSlave () {
86 new_master_name=$current_slave_name
87 new_master_id=$current_slave_id
88 new_slave_name=$current_master_name
89 new_slave_id=$current_master_id
90 #Setup old master config to slave mode
91 echo "[INFO] Setting up configuration for the new slave node '$new_slave_name'..."
92 ssh -i $postgres_user_key postgres@$new_slave_name "sudo systemctl stop postgresql-9.6"
93 ssh -i $postgres_user_key postgres@$new_slave_name "ln -sf $postgres_configdir/postgresql.conf.slave $postgres_configdir/postgresql.conf"
94 ssh -i $postgres_user_key postgres@$new_slave_name "[ -f $postgres_datadir/recovery.done ] && mv $postgres_datadir/recovery.done $postgres_datadir/recovery.conf" || true;
95 # Switch slave to new master
96 echo "[INFO] Setting up configuration for the new master '$new_master_name'..."
97 ssh -i $postgres_user_key postgres@$new_master_name "[ -f /tmp/trigger_file ] && rm /tmp/trigger_file" || true;
98 ssh -i $postgres_user_key postgres@$new_master_name "[ -f $postgres_datadir/recovery.conf ] && mv $postgres_datadir/recovery.conf $postgres_datadir/recovery.done" || true
99 ssh -i $postgres_user_key postgres@$new_master_name "ln -sf $postgres_configdir/postgresql.conf.master $postgres_configdir/postgresql.conf"
100 echo "[INFO] Restarting new master..."
101 ssh -i $postgres_user_key postgres@$new_master_name "sudo systemctl restart postgresql-9.6"
102 status=$(systemctl --host $new_master_name is-active postgresql-9.6)
103 if [ $status != "active" ]
105 echo "[ERROR] New postgres master not running !";
108 echo "[OK] New master started.";
110 # Start new slave/master with online recovery
111 echo "[INFO] Performing online slave recovery..."
112 ssh -i $postgres_user_key postgres@$new_slave_name "bash /var/lib/pgsql/streaming-replication.sh $new_master_name"
113 echo "[OK] Online recovery completed."
115 #Write changes to pgpool.conf file to keep the same current master and slave nodes even after pgpool reboot.
116 sed -i "s/^backend_hostname0.*/backend_hostname0='$new_master_name'/" $pgpool_configdir/pgpool.conf
117 sed -i "s/^backend_hostname1.*/backend_hostname1='$new_slave_name'/" $pgpool_configdir/pgpool.conf
118 echo "[OK] Pgpool configuration file updated."
120 #Attach new master to pgpool
121 echo "[INFO] Attaching new master node '$new_master_name'..."
122 AttachNodeToPgpool "$new_master_id"
123 echo "[OK] New master node '$new_master_name' has been successfully reattached to pgpool."
125 #Attach new slave to pgpool
126 echo "[INFO] Attaching new slave node '$new_slave_name'..."
127 AttachNodeToPgpool "$new_slave_id"
128 echo "[OK] New slave node '$new_slave_name' has been successfully reattached to pgpool."
132 CheckIfPgpoolIsRunning
134 #Get master/slave state
135 current_master_state=$(pcp_node_info --host localhost --port 9898 --username postgres --no-password --node-id $current_master_id | cut -d' ' -f3)
136 current_slave_state=$(pcp_node_info --host localhost --port 9898 --username postgres --no-password --node-id $current_slave_id | cut -d' ' -f3)
138 # state 1 => postgres server is attached but still not receiving connections
139 # state 2 => postgres server is attached and managing clients connections
140 # state 3 => postgres server is detached and probably is down.
142 #If slave is down and master is up then perform an online slave backup.
143 [ $current_slave_state == 3 ] && ([ $current_master_state == 1 ] || [ $current_master_state == 2 ]) && ReattachDegeneratedSlave
144 #If master is down then switch roles between failed master(new server) and the slave(new master).
145 [ $current_master_state == 3 ] && SwitchOldMasterToSlave