author:skate
time:2013/12/09
lvs+ldirectord+pacemaker+corosync+mysql实现高可用负载均衡(一)
corosync和pacemaker安装配置高可用
1. 在所有节点上安装pacemaker和corosync和crmsh
yum方式安装
# yum install pacemaker* corosync*
检测是否安装成功
[root@localhost mysqlinstall]# service corosync status
corosync is stopped
[root@localhost mysqlinstall]# service pacemaker status
pacemakerd is stopped
[root@localhost mysqlinstall]#
或
[root@master mysqlinstall]# ps auxf | grep pacemaker
root 5264 0.0 0.0 103240 832 pts/0 S+ 00:16 0:00 | \_ grep pacemaker
root 10400 0.1 0.1 73488 2844 ? S Nov29 16:42 /usr/libexec/pacemaker/lrmd
189 10402 0.0 1.4 121908 27448 ? S Nov29 8:00 /usr/libexec/pacemaker/pengine
189 3105 0.0 0.5 94412 11132 ? S Dec05 0:49 \_ /usr/libexec/pacemaker/cib
root 3106 0.0 0.2 94508 4060 ? S Dec05 0:33 \_ /usr/libexec/pacemaker/stonithd
189 3108 0.0 0.1 89388 3080 ? S Dec05 0:35 \_ /usr/libexec/pacemaker/attrd
189 3110 0.0 0.6 145460 12328 ? S Dec05 0:33 \_ /usr/libexec/pacemaker/crmd
[root@master mysqlinstall]# ps auxf | grep corosync
root 5422 0.0 0.0 103240 832 pts/0 S+ 00:17 0:00 | \_ grep corosync
root 3099 0.1 0.3 690528 7580 ? Ssl Dec05 9:46 corosync
crmsh工具下载安装
# wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/x86_64/crmsh-1.2.6-5.1.x86_64.rpm
安装crmsh需要依赖pssh和python-lxml
# rpm -ivh pssh-2.3.1-3.2.x86_64.rpm
# yum install python-lxml
# rpm -ivh crmsh-1.2.6-5.1.x86_64.rpm
检测crmsh是否安装成功
[root@localhost mysqlinstall]# crm --version
1.2.6-5.1 (Build 4f66cc1901854a7cb60d483414ce5cf2d749db60)
[root@localhost mysqlinstall]#
2. 所有节点前提准备
1) 各节点之间主机名互相解析
[root@slave92 mysqlinstall]# more /etc/hosts
192.168.213.91 master
192.168.213.92 slave92
2) 各节点之间时间同步
[root@slave92 mysqlinstall]# crontab -e
#ntp
*/10 * * * * /usr/sbin/ntpdate 10.10.10.70 > /dev/null 2>&1
3) 各节点之间ssh互信
[root@slave92 mysqlinstall]# more login_nopass.sh
#!/bin/bash
#for localhost login others db with no passwd
#by: zxg
#time:2012/10/12
for i in $@
do
if [ ! -f /root/.ssh/id_dsa.pub ];then
ssh-keygen -t dsa
fi
scp /root/.ssh/id_dsa.pub root@$i:/tmp/
echo "copied"
ssh -l root root@$i " [ -e /root/.ssh ] || mkdir /root/.ssh"
ssh -l root root@$i "cat /tmp/id_dsa.pub >> /root/.ssh/authorized_keys"
echo "attaphed"
echo "$i is done!"
ssh -l root root@$i "rm -f /tmp/id_dsa.pub"
done
把这个脚本部署到所有的主机上,然后安如下方式执行
[root@slave92 mysqlinstall]# sh login_nopass.sh 192.168.213.91
[root@master mysqlinstall]# sh login_nopass.sh 192.168.213.92
4) 关闭防火墙与SELinux
# service iptables stop
# vi /etc/selinux/config
3. 详细配置
3.1 corosync配置
root@master mysqlinstall]# ll /etc/corosync/
total 24
-r-------- 1 root root 128 Nov 20 23:54 authkey
-rw-r--r-- 1 root root 545 Nov 21 20:43 corosync.conf
-rw-r--r-- 1 root root 445 May 15 2013 corosync.conf.example
-rw-r--r-- 1 root root 1084 May 15 2013 corosync.conf.example.udpu
drwxr-xr-x 2 root root 4096 May 15 2013 service.d
drwxr-xr-x 2 root root 4096 May 15 2013 uidgid.d
[root@master mysqlinstall]#
可以看到corosync提供一个样例文件corosync.conf.example
[root@master mysqlinstall]# cp corosync.conf.example corosync.conf
[root@master mysqlinstall]# more /etc/corosync/corosync.conf
# Please read the corosync.conf.5 manual page
compatibility: whitetank //兼容老版本
totem { //集群节点间通信协议
version: 2 //版本号
secauth: off //是否开启安全认证功能
threads: 0 //多少线程
interface { //心跳信息
member {
memberaddr: 192.168.213.91 //在线管理节点
}
member {
memberaddr: 192.168.213.92
}
ringnumber: 0
bindnetaddr: 192.168.213.0 //绑定的网络,本地那个网卡
mcastport: 5405 //端口
ttl: 1 //端口号
}
transport: udpu
}
logging { //日志
fileline: off
to_logfile: yes //是否记录日志文件中
to_syslog: yes //是否记录进系统日志
debug: on //是否开启调试信息
logfile: /var/log/cluster/corosync.log //日志位置
timestamp: on //是否记录时间
logger_subsys {
subsys: AMF
debug: off
}
}
(添加以下信息)
service {
ver: 0
name: pacemaker //使用pacemaker
}
用 man corosync.conf 可以查看所有选项的意思
注:corosync生成key文件会默认调用/dev/random随机数设备,一旦系统中断的IRQS的随机数不够用,将会产生大量的等待时间,因此,为了节约时间,我们在生成key之前讲random替换成urandom,以便节约时间。
[root@master mysqlinstall]# mv /dev/{random,random.bak}
[root@master mysqlinstall]# ln -s /dev/urandom /dev/random
[root@master mysqlinstall]# cd /etc/corosync
[root@master mysqlinstall]# corosync-keygen
查看生成可以文件“authkey”
[root@master mysqlinstall]# ll /etc/corosync/
total 24
-r-------- 1 root root 128 Nov 20 23:54 authkey
-rw-r--r-- 1 root root 545 Nov 21 20:43 corosync.conf
-rw-r--r-- 1 root root 445 May 15 2013 corosync.conf.example
-rw-r--r-- 1 root root 1084 May 15 2013 corosync.conf.example.udpu
drwxr-xr-x 2 root root 4096 May 15 2013 service.d
drwxr-xr-x 2 root root 4096 May 15 2013 uidgid.d
把文件authkey和corosync.conf复制到slave92上(corosync集群添加新的节点,也是把authkey和corosync.conf复制到新的节点并修改所有节点上的corosync.conf,如果要在线维护,一定要在资源的维护模式下操作)。
corosync到这个就全配置完了,现在启动看看corosync是否正常
3.2 启动所有节点的corosync
[root@master mysqlinstall]# service corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@master mysqlinstall]# service corosync status
corosync (pid 22130) is running...
[root@master mysqlinstall]#
查看corosync启动信息
A. 查看corosync引擎是否正常启动
[root@master mysqlinstall]# grep -e "Corosync Cluster Engine" -e "configuration file" /var/log/cluster/corosync.log
Nov 28 02:05:49 corosync [MAIN ] Corosync Cluster Engine ('1.4.1'): started and ready to provide service.
Nov 28 02:05:49 corosync [MAIN ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
Dec 10 00:24:53 corosync [MAIN ] Corosync Cluster Engine exiting with status 0 at main.c:1858.
B. 查看初始化成员节点通知是否正常发出
[root@master mysqlinstall]# grep TOTEM /var/log/cluster/corosync.log | more
Nov 20 23:55:44 corosync [TOTEM ] Initializing transport (UDP/IP Unicast).
Nov 20 23:55:44 corosync [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Nov 20 23:55:45 corosync [TOTEM ] The network interface [192.168.213.91] is now up.
Nov 20 23:55:45 corosync [TOTEM ] adding new UDPU member {192.168.213.91}
Nov 20 23:55:45 corosync [TOTEM ] adding new UDPU member {192.168.213.92}
Nov 20 23:55:45 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
Nov 20 23:56:17 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
C. 检查启动过程中是否有错误产生
[root@master mysqlinstall]# grep ERROR: /var/log/cluster/corosync.log | more
Nov 20 23:55:45 corosync [pcmk ] ERROR: process_ais_conf: You have configured a cluster using the Pacemaker plugin for Corosync. Th
e plugin is not supported in this environment and will be removed very soon.
Nov 20 23:55:45 corosync [pcmk ] ERROR: process_ais_conf: Please see Chapter 8 of 'Clusters from Scratch' (http://www.clusterlabs.
org/doc) for details on using Pacemaker with CMAN
D. 查看pacemaker是否正常启动
grep pcmk_startup /var/log/cluster/corosync.log
查看集群状态
[root@master mysqlinstall]# crm_mon
Attempting connection to the cluster...
Last updated: Tue Dec 10 02:00:24 2013
Last change: Tue Dec 10 01:17:19 2013 via crm_attribute on master
Stack: classic openais (with plugin)
Current DC: slave92 - partition with quorum
Version: 1.1.10-1.el6_4.4-368c726
3 Nodes configured, 3 expected votes
4 Resources configured
Online: [ master slave92 ]
OFFLINE: [ slave93 ]
vip (ocf::heartbeat:IPaddr): Started master
Master/Slave Set: ms_MySQL [p_mysql]
Masters: [ master ]
Slaves: [ slave92 ]
vip_r (ocf::heartbeat:IPaddr): Started master
显示本集群可用节点
[root@master mysqlinstall]# crm_node -l
1540729024 master member
1557506240 slave92 member
上面看到这个集群管理了4个资源,corosync本身是不管理资源的,要借助pacemaker才可以管理资源
[root@master mysqlinstall]# crm
crm(live)# configure //进入配置模式
crm(live)configure# verify //验证配置文件
crm(live)configure# show //查看配置文件
crm(live)# status //查看资源状态
crm(live)resource# reprobe //刷新资源状态
eg:
crm(live)configure# show
node master \
attributes maintenance="off" standby="off"
node slave92 \
attributes standby="off" maintenance="off"
node slave93
primitive p_mysql ocf:heartbeat:mysql \
params config="/etc/my.cnf" pid="/mysql/data/mysql5528/data/master.pid" replication_user="rep" replication_passwd="rep" max_slave_lag="120" evict_outdated_slaves="false" binary="/usr/local/mysql/bin/mysqld_safe" user="root" test_user="root" test_passwd="root" socket="/mysql/tmp/mysql.sock" datadir="/mysql/data/mysql5528/data" \
op monitor interval="10s" timeout="30s" role="Master" OCF_CHECK_LEVEL="1" \
op monitor interval="30s" timeout="30s" role="Slave" OCF_CHECK_LEVEL="1" \
op start interval="0" timeout="120s" \
op stop interval="0" timeout="120s"
primitive vip ocf:heartbeat:IPaddr \
params ip="192.168.213.95" nic="eth0:1" \
meta target-role="Started" is-managed="true"
primitive vip_r ocf:heartbeat:IPaddr \
params ip="192.168.213.96" nic="eth0:2" \
meta target-role="Started" is-managed="true" resource-stickiness="100"
ms ms_MySQL p_mysql \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" globally-unique="false" target-role="Started" is-managed="true"
colocation vip_before_ms_MySQL_M 500: ms_MySQL:Master vip
order vip_mysql_1 Mandatory: ms_MySQL:promote vip
property $id="cib-bootstrap-options" \
dc-version="1.1.10-1.el6_4.4-368c726" \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes="3" \
stonith-enabled="false" \
no-quorum-policy="ignore" \
last-lrm-refresh="1385485315"
property $id="mysql_replication" \
p_mysql_REPL_INFO="master|mysql-bin.000234|107"
crm(live)configure#
如果没有STONITH设备,就要先关闭stonith-enabled这个属性 ,如下
crm(live)configure# property stonith-enabled=false
对于只有两节点的集群来讲。如果要满足集群服务本身正常运行的条件,就需要忽略quorum不能满足时的集群状态检查
crm(live)configure# property no-quorum-policy=ignore
需要修改两个文件
1.
[root@master mysqlinstall]# vi /etc/init.d/mysqld
....
....
# Set pid file if not given
#
if test -z "$mysqld_pid_file_path"
then
###by zxg modify
mysqld_pid_file_path=$datadir/master.pid
#mysqld_pid_file_path=$datadir/`hostname`.pid
else
case "$mysqld_pid_file_path" in
....
2.
[root@master mysqlinstall]# vi /usr/lib/ocf/resource.d/heartbeat/mysql
....
....
OCF_RESKEY_config_default="/etc/my.cnf"
OCF_RESKEY_datadir_default="/var/lib/mysql"
OCF_RESKEY_user_default="mysql"
OCF_RESKEY_group_default="mysql"
OCF_RESKEY_log_default="/var/log/mysqld.log"
###by zxg modify
#OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid"
OCF_RESKEY_pid_default="/mysql/data/mysql5528/data/master.pid"
OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock"
...
...
参考:
http://clusterlabs.org/
http://clusterlabs.org/doc/en-US/Pacemaker/1.0/html/Pacemaker_Explained/index.html
http://freeloda.blog.51cto.com/2033581/1274533
http://freeloda.blog.51cto.com/2033581/1272417
----end---