假设主服务器宕机,哨兵1先检测到这个结果,系统并不会马上进行failover过程,仅仅是哨兵1主观认为主服务器不可用,此现象称之为主观下线.当后面的哨兵也检测到主服务器不可用,并且数量达到一定值时,该过程称之为客观下线。此后哨兵之间会进行一次投票,投票的结果由一个哨兵发起,最终选出一个leader(哨兵节点)进行故障转移.切换成功后,就会通过发布订阅模式,让各个哨兵把自己监控的从服务器实现主从切换.
2、搭建步骤
说明:提前搭建好主从复制环境.
2.1、主从环境
--主从环境确认如下.
[root@leo-redis626-a data]# redis-cli -p 6379
127.0.0.1:6379> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=127.0.0.1,port=6380,state=online,offset=224,lag=1
slave1:ip=127.0.0.1,port=6381,state=online,offset=224,lag=1
master_failover_state:no-failover
master_replid:5a9b5f18b18446393fce7c4e09d482607c2917d1
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:224
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:1
repl_backlog_histlen:224
2.2、创建哨兵文件
--新建sentinel-26379.conf文件
[root@leo-redis626-a redis-6.2.6]# pwd
/usr/local/src/redis-6.2.6
[root@leo-redis626-a redis-6.2.6]# vi sentinel-26379.conf
添加如下:
port 26379
dir "/redis/data"
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 5000
--新建sentinel-26380.conf文件
[root@leo-redis626-a redis-6.2.6]# vi sentinel-26380.conf
添加如下:
port 26380
dir "/redis/data"
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 5000
--新建sentinel-26381.conf文件
[root@leo-redis626-a redis-6.2.6]# vi sentinel-26381.conf
添加如下:
port 26381
dir "/redis/data"
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 5000
2.3、启动哨兵
--启动sentinel 1
[root@leo-redis626-a redis-6.2.6]# redis-sentinel sentinel-26379.conf
--日志信息
5026:X 06 Apr 2023 10:17:04.491 # Sentinel ID is bc44243483181667d254b7c40bbdb444907b4efd
5026:X 06 Apr 2023 10:17:04.491 # +monitor master mymaster 127.0.0.1 6379 quorum 2
5026:X 06 Apr 2023 10:17:04.492 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5026:X 06 Apr 2023 10:17:04.493 * +slave slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5026:X 06 Apr 2023 10:17:27.400 * +sentinel sentinel a29e697c218031a88d4dd5e9504ba7ac85b3c1e8 127.0.0.1 26380 @ mymaster 127.0.0.1 6379
5026:X 06 Apr 2023 10:17:46.840 * +sentinel sentinel 301207118767e46277a5c42e94edeed277d6bb4e 127.0.0.1 26381 @ mymaster 127.0.0.1 6379
--启动sentinel 2
[root@leo-redis626-a redis-6.2.6]# redis-sentinel sentinel-26380.conf
--日志信息
5033:X 06 Apr 2023 10:17:25.325 # Sentinel ID is a29e697c218031a88d4dd5e9504ba7ac85b3c1e8
5033:X 06 Apr 2023 10:17:25.325 # +monitor master mymaster 127.0.0.1 6379 quorum 2
5033:X 06 Apr 2023 10:17:25.326 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5033:X 06 Apr 2023 10:17:25.327 * +slave slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5033:X 06 Apr 2023 10:17:26.873 * +sentinel sentinel bc44243483181667d254b7c40bbdb444907b4efd 127.0.0.1 26379 @ mymaster 127.0.0.1 6379
5033:X 06 Apr 2023 10:17:46.840 * +sentinel sentinel 301207118767e46277a5c42e94edeed277d6bb4e 127.0.0.1 26381 @ mymaster 127.0.0.1 6379
--启动sentinel 3
[root@leo-redis626-a redis-6.2.6]# redis-sentinel sentinel-26381.conf
--日志信息
5048:X 06 Apr 2023 10:17:44.767 # Sentinel ID is 301207118767e46277a5c42e94edeed277d6bb4e
5048:X 06 Apr 2023 10:17:44.767 # +monitor master mymaster 127.0.0.1 6379 quorum 2
5048:X 06 Apr 2023 10:17:44.768 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:17:44.770 * +slave slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:17:45.191 * +sentinel sentinel bc44243483181667d254b7c40bbdb444907b4efd 127.0.0.1 26379 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:17:45.838 * +sentinel sentinel a29e697c218031a88d4dd5e9504ba7ac85b3c1e8 127.0.0.1 26380 @ mymaster 127.0.0.1 6379
2.4、哨兵节点状态
[root@leo-redis626-a data]# redis-cli -p 26379
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=127.0.0.1:6379,slaves=2,sentinels=3
[root@leo-redis626-b redis-6.2.6]# ps -ef|grep redis
avahi 857 1 0 22:00 ? 00:00:00 avahi-daemon: running [leo-redis626-b.local]
root 2381 2273 0 22:05 pts/1 00:00:00 tail -5000f redis.log
root 2392 2313 0 22:05 pts/2 00:00:00 tail -5000f redis26380.log
root 2411 1 0 22:07 ? 00:00:02 redis-server 127.0.0.1:6380
root 2599 1 0 22:24 ? 00:00:01 redis-sentinel *:26380 [sentinel]
root 2663 2227 0 22:30 pts/0 00:00:00 grep --color=auto redi
3、故障演示
3.1、kill主节点
说明:如下所示,当前master 端口为6379.
[root@leo-redis626-a ~]# redis-cli -p 26379
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=127.0.0.1:6379,slaves=2,sentinels=3
--使用kill命令杀掉master.
[root@leo-redis626-a data]# ps -ef|grep redis
avahi 841 1 0 08:32 ? 00:00:00 avahi-daemon: running [leo-redis626-a.local]
root 4376 3460 0 09:31 pts/1 00:00:13 redis-server *:6379
root 4395 4217 0 09:32 pts/2 00:00:07 redis-server *:6380
root 4404 4256 0 09:33 pts/3 00:00:07 redis-server *:6381
root 4457 4332 0 09:37 pts/5 00:00:00 redis-cli -p 6379
root 5026 4807 0 10:17 pts/6 00:00:02 redis-sentinel *:26379 [sentinel]
root 5033 4846 0 10:17 pts/7 00:00:01 redis-sentinel *:26380 [sentinel]
root 5048 4880 0 10:17 pts/8 00:00:02 redis-sentinel *:26381 [sentinel]
root 5108 4290 0 10:22 pts/4 00:00:00 grep --color=auto redis
[root@leo-redis626-a data]# kill -9 4376
3.2、日志信息
Sentinel1日志:
5026:X 06 Apr 2023 10:25:05.821 # +sdown master mymaster 127.0.0.1 6379
5026:X 06 Apr 2023 10:25:05.979 # +new-epoch 1
5026:X 06 Apr 2023 10:25:05.981 # +vote-for-leader 301207118767e46277a5c42e94edeed277d6bb4e 1
5026:X 06 Apr 2023 10:25:06.940 # +odown master mymaster 127.0.0.1 6379 #quorum 3/2
5026:X 06 Apr 2023 10:25:06.940 # Next failover delay: I will not start a failover before Thu Apr 6 10:31:06 2023
5026:X 06 Apr 2023 10:25:07.097 # +config-update-from sentinel 301207118767e46277a5c42e94edeed277d6bb4e 127.0.0.1 26381 @ mymaster 127.0.0.1 6379 --哨兵26381配置文件更新.
5026:X 06 Apr 2023 10:25:07.097 # +switch-master mymaster 127.0.0.1 6379 127.0.0.1 6381
5026:X 06 Apr 2023 10:25:07.097 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6381
5026:X 06 Apr 2023 10:25:07.097 * +slave slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
5026:X 06 Apr 2023 10:25:12.132 # +sdown slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
Sentinel2日志信息如下:
5033:X 06 Apr 2023 10:25:05.883 # +sdown master mymaster 127.0.0.1 6379
5033:X 06 Apr 2023 10:25:05.979 # +new-epoch 1
5033:X 06 Apr 2023 10:25:05.981 # +vote-for-leader 301207118767e46277a5c42e94edeed277d6bb4e 1
5033:X 06 Apr 2023 10:25:05.990 # +odown master mymaster 127.0.0.1 6379 #quorum 3/2
5033:X 06 Apr 2023 10:25:05.990 # Next failover delay: I will not start a failover before Thu Apr 6 10:31:06 2023
5033:X 06 Apr 2023 10:25:07.097 # +config-update-from sentinel 301207118767e46277a5c42e94edeed277d6bb4e 127.0.0.1 26381 @ mymaster 127.0.0.1 6379
5033:X 06 Apr 2023 10:25:07.097 # +switch-master mymaster 127.0.0.1 6379 127.0.0.1 6381
5033:X 06 Apr 2023 10:25:07.097 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6381
5033:X 06 Apr 2023 10:25:07.097 * +slave slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
5033:X 06 Apr 2023 10:25:12.117 # +sdown slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
Sentinel3日志信息如下:
5048:X 06 Apr 2023 10:25:05.883 # +sdown master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:05.974 # +odown master mymaster 127.0.0.1 6379 #quorum 3/2
5048:X 06 Apr 2023 10:25:05.974 # +new-epoch 1
5048:X 06 Apr 2023 10:25:05.974 # +try-failover master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:05.978 # +vote-for-leader 301207118767e46277a5c42e94edeed277d6bb4e 1
5048:X 06 Apr 2023 10:25:05.981 # bc44243483181667d254b7c40bbdb444907b4efd voted for 301207118767e46277a5c42e94edeed277d6bb4e 1
5048:X 06 Apr 2023 10:25:05.981 # a29e697c218031a88d4dd5e9504ba7ac85b3c1e8 voted for 301207118767e46277a5c42e94edeed277d6bb4e 1
5048:X 06 Apr 2023 10:25:06.050 # +elected-leader master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:06.051 # +failover-state-select-slave master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:06.126 # +selected-slave slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:06.126 * +failover-state-send-slaveof-noone slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:06.187 * +failover-state-wait-promotion slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:07.024 # +promoted-slave slave 127.0.0.1:6381 127.0.0.1 6381 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:07.024 # +failover-state-reconf-slaves master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:07.094 * +slave-reconf-sent slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:08.043 * +slave-reconf-inprog slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:08.043 * +slave-reconf-done slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:08.144 # -odown master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:08.144 # +failover-end master mymaster 127.0.0.1 6379
5048:X 06 Apr 2023 10:25:08.144 # +switch-master mymaster 127.0.0.1 6379 127.0.0.1 6381
5048:X 06 Apr 2023 10:25:08.144 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ mymaster 127.0.0.1 6381
5048:X 06 Apr 2023 10:25:08.144 * +slave slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
5048:X 06 Apr 2023 10:25:13.222 # +sdown slave 127.0.0.1:6379 127.0.0.1 6379 @ mymaster 127.0.0.1 6381
3.3、验证数据
--验证切换信息
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=127.0.0.1:6381,slaves=2,sentinels=3
127.0.0.1:26380> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=127.0.0.1:6381,slaves=2,sentinels=3
[root@leo-redis626-a ~]# redis-cli -p 6380
127.0.0.1:6380> info replication
# Replication
role:slave
master_host:127.0.0.1
master_port:6381
master_link_status:up
master_last_io_seconds_ago:0
master_sync_in_progress:0
slave_read_repl_offset:136488
slave_repl_offset:136488
slave_priority:100
slave_read_only:1
replica_announced:1
connected_slaves:0
master_failover_state:no-failover
master_replid:d3458227e4ed18291dd80d2bff58b1abc403c1b5
master_replid2:5a9b5f18b18446393fce7c4e09d482607c2917d1
master_repl_offset:136488
second_repl_offset:92961
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:1
repl_backlog_histlen:136488
说明:如上所示,redis主从成功切换.哨兵系统中的主从节点,与普通的主从节点无区别,故障发现和转移是由哨兵来控制和完成的.哨兵节点本质上是redis节点.每个哨兵节点只需要配置监控主节点,便可以自动发现其他的哨兵节点和从节点.在哨兵节点启动和故障转移阶段,各个节点的配置文件会被重写(config rewrite).
参考网址:https://www.bilibili.com/video/BV18K4y197w1/?vd_source=8c872e2fd1d99229b38a73ed6718b776