前言

之前搭建好了Gelera Cluster,实现了数据同步,下一步就是保证三台主机的高可用了。

安装Keepalived

1
yum install keepalived

禁用防火墙

1
2
systemctl stop firewalld 
systemctl disable firewalld

关闭SELinux

1
2
3
4
# 临时关闭,重启后失效
setenforce 0
# 修改/etc/selinux/config 文件
SELINUX=disabled

配置/etc/keepalived.conf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
show status like 'wsrep%';

# keepalived配置
! Configuration File for keepalived

global_defs {
router_id mysql-01 # 主机host
enable_script_security
}
vrrp_script chk_mysql {
script "/etc/keepalived/check_mysql.sh"
interval 5 # 调用间隔
user root
}
vrrp_instance VI_1 {
state MASTER
interface ens35 # ifconfig网卡
virtual_router_id 36 # 保证网内唯一
priority 100 # 三台机器分别配置100 90 80优先级
advert_int 1
authentication {
auth_type PASS
auth_pass mypass
}
track_script {
chk_mysql
}
virtual_ipaddress {
192.168.6.188
}
}

检查脚本check_mysql.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash

###判断如果上次检查的脚本还没执行完,则退出此次执行
if [ `ps -ef|grep -w "$0"|grep -v "grep"|wc -l` -gt 2 ];then
exit 0
fi
mysql_con='mysql -uroot -pYour@Password'
error_log="/etc/keepalived/logs/check_mysql.err"

###定义一个简单判断mysql是否可用的函数
function excute_query {
${mysql_con} -e "select 1;" 2>> ${error_log}
}

###定义无法执行查询,且mysql服务异常时的处理函数
function service_error {
echo -e "`date "+%F %H:%M:%S"` -----mysql service error,now stop keepalived-----" >> ${error_log}
service keepalived stop &>> ${error_log}
echo "DB1 keepalived 已停止"|mail -s "DB1 keepalived 已停止,请及时处理!" 2>> ${error_log}
echo -e "\n---------------------------------------------------------\n" >> ${error_log}
}

###定义无法执行查询,但mysql服务正常的处理函数
function query_error {
echo -e "`date "+%F %H:%M:%S"` -----query error, but mysql service ok, retry after 30s-----" >> ${error_log}
sleep 30
excute_query
if [ $? -ne 0 ];then
echo -e "`date "+%F %H:%M:%S"` -----still can't execute query-----" >> ${error_log}

###对DB1设置read_only属性
echo -e "`date "+%F %H:%M:%S"` -----set read_only = 1 on DB1-----" >> ${error_log}
mysql_con -e "set global read_only = 1;" 2>> ${error_log}

###kill掉当前客户端连接
echo -e "`date "+%F %H:%M:%S"` -----kill current client thread-----" >> ${error_log}
rm -f /tmp/kill.sql &>/dev/null
###这里其实是一个批量kill线程的小技巧
mysql_con -e 'select concat("kill ",id,";") from information_schema.PROCESSLIST where command="Query" or command="Execute" into outfile "/tmp/kill.sql";'
mysql_con -e "source /tmp/kill.sql"
sleep 2 ###给kill一个执行和缓冲时间
###关闭本机keepalived
echo -e "`date "+%F %H:%M:%S"` -----stop keepalived-----" >> ${error_log}
service keepalived stop &>> ${error_log}
echo "DB1 keepalived 已停止"|mail -s "DB1 keepalived 已停止,请及时处理!" 2>> ${error_log}
echo -e "\n---------------------------------------------------------\n" >> ${error_log}
else
echo -e "`date "+%F %H:%M:%S"` -----query ok after 30s-----" >> ${error_log}
echo -e "\n---------------------------------------------------------\n" >> ${error_log}
fi
}

###检查开始: 执行查询
excute_query
if [ $? -ne 0 ];then
service mysqld status &>/dev/null
if [ $? -ne 0 ];then
service_error
else
query_error
fi
fi

遇到问题

搭建过程中遇到一个问题,keepalived不执行vrrp_script的脚本,找了很多原因,最终解决方案:

  1. 添加脚本可执行权限chmod +x check_mysql.sh
  2. 在vrrp_script中添加user root