环境
参考 手动部署 ceph 环境说明 (luminous 版)
目标
- /dev/sdm 作为系统盘
- /dev/sdn 作为 journal ssd disk 用于加速 journal 写入, 提高 osd 能力
- /dev/sda ~ /dev/sdl 每个 SATA 盘 独立 raid0 (6TB) 用于 osd data disk
- 对 /dev/sdn 进行分区, 每个分区 5GB, 对应存储各个 osd 的日志
初始化
初始化 journal disk
#!/bin/bash
source parameter
info=`dmidecode -t system | grep "Product Name:" |awk -F':' '{print $2}' | sed s/\ //g`
case "$info" in
"PowerEdgeR730xd" )
sysdisk="$R730xd_SYSDISK_DISK"
jdisk="$R730xd_JOURNAL_DISK"
osddisk=`awk '/sd/ {print $NF}' /proc/partitions | grep -v "$sysdisk" | grep [[:alpha:]]$ |sort`
;;
*)
echo nonono
;;
esac
start=1
end=`expr $start + 5000`
dd if=/dev/zero of=/dev/$jdisk bs=1M count=20
#parted /dev/"$jdisk" align-check optimal 1
parted -s /dev/"$jdisk" mklabel gpt
for disk in $osddisk
do
parted /dev/"$jdisk" mkpart $disk xfs $start $end
start=$end
end=`expr $start + 5000`
done
partition=`parted /dev/sdn print | awk '{if ($5 ~ "sd?") print $1}'`
for p in $partition
do
mkfs -t xfs /dev/$jdisk$p
done
初始化 osd disk
#!/bin/bash
source parameter
info=`dmidecode -t system | grep "Product Name:" |awk -F':' '{print $2}' | sed s/\ //g`
case "$info" in
"PowerEdgeR730xd" )
sysdisk="$R730xd_SYSDISK_DISK"
osddisk=`awk '/sd/ {print $NF}' /proc/partitions | grep -v "$sysdisk" | grep [[:alpha:]]$`
;;
*)
echo nonono
exit 1
;;
esac
for disk in $osddisk
do
dd if=/dev/zero of=/dev/"$disk" bs=1M count=40
parted -s /dev/"$disk" mklabel gpt
parted /dev/"$disk" mkpart primary xfs 1 100%
hdparm -z /dev/"$disk"1
mkfs.xfs -f -i size=1024 /dev/"$disk"1
done
分区信息
参考 journal disk 初始化后分区信息
[root@cephsvr-128040 ceph]# parted /dev/sdn print
Model: DELL PERC H730 Mini (scsi)
Disk /dev/sdn: 106GB
Sector size (logical/physical): 512B/4096B
Partition Table: gpt
Disk Flags:
Number Start End Size File system Name 标志
1 1049kB 5001MB 5000MB xfs sda
2 5001MB 10.0GB 5001MB xfs sdb
3 10.0GB 15.0GB 5000MB xfs sdc
4 15.0GB 20.0GB 5000MB xfs sdd
5 20.0GB 25.0GB 5001MB xfs sde
6 25.0GB 30.0GB 5000MB xfs sdf
7 30.0GB 35.0GB 5001MB xfs sdg
8 35.0GB 40.0GB 5000MB xfs sdh
9 40.0GB 45.0GB 5000MB xfs sdi
10 45.0GB 50.0GB 5001MB xfs sdj
11 50.0GB 55.0GB 5000MB xfs sdk
12 55.0GB 60.0GB 5000MB xfs sdl
参考 osd 磁盘分区格式化后信息
[root@cephsvr-128040 ceph]# parted /dev/sdj print
Model: ATA TOSHIBA MG04ACA6 (scsi)
Disk /dev/sdj: 6001GB
Sector size (logical/physical): 512B/4096B
Partition Table: gpt
Disk Flags:
Number Start End Size File system Name 标志
1 1049kB 6001GB 6001GB xfs primary
创建 OSD
#!/bin/bash
source parameter
info=`dmidecode -t system | grep "Product Name:" |awk -F':' '{print $2}' | sed s/\ //g`
case "$info" in
"PowerEdgeR730xd" )
sysdisk="$R730xd_SYSDISK_DISK"
jdisk="$R730xd_JOURNAL_DISK"
osddisk=`awk '/sd/ {print $NF}' /proc/partitions | grep -v "$sysdisk" | grep [[:alpha:]]$ |sort`
;;
*)
echo nonono
;;
esac
osdnum=0
for host in $cephosdhost
do
info=`dmidecode -t system | grep "Product Name:" |awk -F':' '{print $2}' | sed s/\ //g`
case "$info" in
"PowerEdgeR730xd" )
sysdisk="$R730xd_SYSDISK_DISK"
osddisk=`awk '/sd/ {print $NF}' /proc/partitions | grep -v "$sysdisk" | grep [[:alpha:]]$ |sort`
;;
*)
echo nonono
exit 1
;;
esac
ceph osd crush add-bucket $host host
ceph osd crush move $host root=default
for disk in $osddisk
do
partition=`ssh $host "parted /dev/$jdisk print " | awk -v disk=$disk '{if ( $6 ~ disk) print $1}'`
# echo -e "[osd.$osdnum]\n\tosd_journal = /var/lib/ceph/journal/ceph-$osdnum" >> /etc/ceph/ceph.conf
ssh $host "mkdir -p /var/lib/ceph/journal/ceph-$osdnum"
ssh $host "echo /dev/$jdisk$partition /var/lib/ceph/journal/ceph-$osdnum xfs defaults 0 0 >> /etc/fstab"
ssh $host "mount /dev/$jdisk$partition /var/lib/ceph/journal/ceph-$osdnum"
ssh $host "mkdir -p /var/lib/ceph/osd/ceph-$osdnum"
ssh $host "echo /dev/"$disk"1 /var/lib/ceph/osd/ceph-$osdnum xfs defaults 0 0 >> /etc/fstab"
ssh $host "mount /dev/"$disk"1 /var/lib/ceph/osd/ceph-$osdnum"
ceph osd create
ssh $host "ceph-osd -i $osdnum --mkjournal --osd-uuid $cephuuid"
ssh $host "ceph-osd -i $osdnum --mkfs --mkkey --osd-uuid $cephuuid"
ssh $host "ceph auth add osd.$osdnum osd 'allow *' mon 'allow profile osd' -i /var/lib/ceph/osd/ceph-$osdnum/keyring"
ceph osd crush add osd.$osdnum 6.0 host=$host
ssh $host "cp /usr/lib/systemd/system/ceph-osd@.service /usr/lib/systemd/system/ceph-osd@$osdnum"
let osdnum++
done
ssh $host "chown ceph:ceph -R /var/lib/ceph"
done
参考执行脚本后的配置
ceph osd tree
[root@cephsvr-128040 ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 216.00000 root default
-2 72.00000 host cephsvr-128040
0 6.00000 osd.0 down 0 1.00000
1 6.00000 osd.1 down 0 1.00000
2 6.00000 osd.2 down 0 1.00000
3 6.00000 osd.3 down 0 1.00000
4 6.00000 osd.4 down 0 1.00000
5 6.00000 osd.5 down 0 1.00000
6 6.00000 osd.6 down 0 1.00000
7 6.00000 osd.7 down 0 1.00000
8 6.00000 osd.8 down 0 1.00000
9 6.00000 osd.9 down 0 1.00000
10 6.00000 osd.10 down 0 1.00000
11 6.00000 osd.11 down 0 1.00000
-3 72.00000 host cephsvr-128214
12 6.00000 osd.12 down 0 1.00000
13 6.00000 osd.13 down 0 1.00000
14 6.00000 osd.14 down 0 1.00000
15 6.00000 osd.15 down 0 1.00000
16 6.00000 osd.16 down 0 1.00000
17 6.00000 osd.17 down 0 1.00000
18 6.00000 osd.18 down 0 1.00000
19 6.00000 osd.19 down 0 1.00000
20 6.00000 osd.20 down 0 1.00000
21 6.00000 osd.21 down 0 1.00000
22 6.00000 osd.22 down 0 1.00000
23 6.00000 osd.23 down 0 1.00000
-4 72.00000 host cephsvr-128215
24 6.00000 osd.24 down 0 1.00000
25 6.00000 osd.25 down 0 1.00000
26 6.00000 osd.26 down 0 1.00000
27 6.00000 osd.27 down 0 1.00000
28 6.00000 osd.28 down 0 1.00000
29 6.00000 osd.29 down 0 1.00000
30 6.00000 osd.30 down 0 1.00000
31 6.00000 osd.31 down 0 1.00000
32 6.00000 osd.32 down 0 1.00000
33 6.00000 osd.33 down 0 1.00000
34 6.00000 osd.34 down 0 1.00000
35 6.00000 osd.35 down 0 1.00000
参考状态
[root@cephsvr-128040 ceph]# ceph -s
cluster:
id: xxxxxxx-5d4d-4d3a-a3b2-04e73eff4ccd
health: HEALTH_WARN
no active mgr
services:
mon: 3 daemons, quorum cephsvr-128040,cephsvr-128214,cephsvr-128215
mgr: no daemons active
osd: 36 osds: 0 up, 0 in <- 已经可以发现 36 个 OSD
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 bytes
usage: 0 kB used, 0 kB / 0 kB avail
pgs:
启动 osd
for i in `seq 0 11`; do ssh cephsvr-128040 "systemctl start ceph-osd@$i"; done
for i in `seq 12 23`; do ssh cephsvr-128214 "systemctl start ceph-osd@$i"; done
for i in `seq 24 35`; do ssh cephsvr-128215 "systemctl start ceph-osd@$i"; done
启动后状态
参考启动后状态
[root@cephsvr-128040 ceph]# ceph -s
cluster:
id: xxxxxxx-5d4d-4d3a-a3b2-04e73eff4ccd
health: HEALTH_WARN
no active mgr
services:
mon: 3 daemons, quorum cephsvr-128040,cephsvr-128214,cephsvr-128215
mgr: no daemons active
osd: 36 osds: 36 up, 36 in <- 已成功启动
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 bytes
usage: 0 kB used, 0 kB / 0 kB avail
pgs:
创建 pool
ceph osd pool create volumes 2048 2048
ceph auth get-or-create client.cinder mon 'profile rbd' osd 'profile rbd pool=volumes'
检测
[root@cephsvr-128040 ~]# ceph osd pool get volumes size
size: 3
[root@cephsvr-128040 ~]# ceph osd pool get volumes pg_num
pg_num: 2048
[root@cephsvr-128040 ~]# ceph osd pool get volumes crush_rule
crush_rule: replicated_rule
创建文件测试
[root@cephsvr-128040 ~]# rbd create --size 1024 volumes/bar
[root@cephsvr-128040 ~]# rbd -p volumes ls
bar
[root@cephsvr-128040 ~]# rbd -p volumes info bar
warning: line 18: 'osd_journal_size' in section 'global' redefined
rbd image 'bar':
size 1024 MB in 256 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.621374b0dc51
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
flags:
create_timestamp: Mon Nov 20 17:36:12 2017
注意
参考 ceph -s 返回信息
[root@cephsvr-128040 ceph]# ceph -s
cluster:
id: xxxxxxx-5d4d-4d3a-a3b2-04e73eff4ccd
health: HEALTH_WARN
no active mgr
services:
mon: 3 daemons, quorum cephsvr-128040,cephsvr-128214,cephsvr-128215
mgr: no daemons active
osd: 36 osds: 36 up, 36 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 bytes
usage: 0 kB used, 0 kB / 0 kB avail
pgs:
说明
- 出现 warning, 原因, 没有可用 mgr 监控进程
- 当前 data 部分全部为 0, 但实际上已经创建 pool volumes, 并存入数据到 pools 中
上面两个原因都是由于 mgr 进程没有启动
反操作
假如需要对某个 CEPH OSD 执行卸载等反操作参考步骤
- ceph osd out osd.0
- ceph osd rm osd.0
- ceph osd crush remove osd.0
- ceph auth del osd.0