CentOS 7 install slurm cluster

时间:2021-01-25 00:28:18
//slurm install
//CentOS 7 system
//192.168.159.141 node01
//192.168.159.142 node02
systemctl stop firewalld.service
systemctl disable firewalld.service
systemctl disable NetworkManager
systemctl stop NetworkManager
sed -i '7 s/enforcing/disabled/' /etc/sysconfig/selinux
getenforce #is disabled--->>continue or reboot
setenforce 0
yum -y update
yum -y remove mariadb-server mariadb-devel
cat /etc/passwd | grep slurm
userdel -r slurm
userdel -r munge
yum -y install mariadb-server mariadb-devel
export MUNGEUSER=1216
groupadd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
export SLURMUSER=1217
groupadd -g $SLURMUSER slurm
useradd -m -c "SLURM workload manger" -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
yum -y install epel-release
yum -y install munge munge-libs munge-devel
yum -y install rng-tools #node01
rngd -r /dev/urandom #node01
/usr/sbin/create-munge-key -r #node01
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key #node01
chown munge: /etc/munge/munge.key #node01
chmod 400 /etc/munge/munge.key #node01
scp /etc/munge/munge.key root@node02:/etc/munge #node01
chown -R munge: /etc/munge/ /var/log/munge/
chmod 0700 /etc/munge/ /var/log/munge/
munge -n
munge -n | unmunge
munge -n | ssh node01/02 unmunge
remunge
yum -y install openssl openssl-devel pam-devel numactl numactl-devel hwloc hwloc-devel lua lua-devel readline-devel rrdtool-devel ncurses-devel man2html libmad libibumad
mkdir -p /nfs/slurm-rpms
cd /nfs
### wget https://www.schedmd.com/downloads.php/slurm-17.02.6.tar.bz2
yum -y install rpm-build
rpmbuild -ta slurm-16.08.9.tar.bz2
cd /root/rpmbuild/RPMS/x86_64
cp -p *.rpm /nfs/slurm-rpms
yum -y install --nogpgcheck loclainstall *.rpm
cd /etc/slurm ; cp -p slurm.conf.example slurm.conf #node01
vim slurm.conf #node01
ControlMachine=node01
ControlAddr=node01
StatSaveLocation=/var/spool/slurmctld
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdLogFile=/var/log/slurmd.log
NodeName=node01 CPUs=1 State=UNKOWN
NodeName=node02 CPUs=1 State=UNKOWN
ParttionName=debug Nodes=node01,node02 Default=YES MaxTime=INFINITE State=UP
:wq
mkdir /var/spool/slurmctld
mkdir slurm: /var/spool/slurmctld
chmod 755 /var/spool/slutmctld
touch /var/log/slurmctld.log
chown slurm: /var/log/slurmctld.log
touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
mkdir /var/spool/slurmd
chown slurm: /var/spool/slurmd
chmod 755 /var/spool/slurmd
touch /var/log/slurmd.log
chown slurm: /var/log/slurmd.log
slurmd -C
systemctl stop friewalld.service
systemctl diable firewalld.service
yum -y install ntp
chkconfig ntpd on
ntpdate pool.ntp.org
systemctl start ntpd
systemctl enable slurmd.service
systemctl start slurmd.service
systemctl status slurmd.service
systemctl enable munge.service
systemctl start munge.service
systemctl status munge.service
systemctl enable slurmctld.service #node01
systemctl start slurmctld.service #node01
systemctl status slurmctld.service #node01
scontrol show nodes
sinfo --Node
 
 -----------------------Have questions to contact me :QQ 1394466404----------------------------