hadoop3.0高可用HA大数据平台架构硬件和部署方案(一)
http://blog.csdn.net/lxb1022/article/details/78389836
hadoop3.0高可用HA大数据平台架构软件和部署方案(二)http://blog.csdn.net/lxb1022/article/details/78399462
1、下载spark-2.2.0-bin-hadoop2.7.tgz
https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
2、复制到安装目录/opt/,并解压
tar -zxvf spark-2.2.0-bin-hadoop2.7.tgz
3、配置/opt/spark-2.2.0-bin-hadoop2.7/conf/spark-env.sh,在尾巴加入:
export JAVA_HOME=/opt/jdk1.8.0_144 #java安装目录
export SCALA_HOME=/opt/scala-2.12.3 #scala安装目录
export HADOOP_HOME=/opt/hadoop-3.0.0-beta1 #hadoop安装目录
export HADOOP_CONF_DIR=/opt/hadoop-3.0.0-beta1/etc/hadoop #hadoop集群配置文件目录
#export SPARK_MASTER_HOST=namenode1 #配置HA高可用,不用配置这个
export SPARK_WORKER_MEMORY=1g #每个worker节点能够最大分配给exectors的内存大小
export SPARK_WORKER_CORES=1 #每个worker节点所占有的CPU核数目
export SPARK_WORKER_INSTANCES=1 #每台机器上开启的worker节点的数目
export SPARK_MASTER_WEBUI_PORT=8090 #web页面端口
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=zookeeper1:2181,zookeeper2:2181,zookeeper3:2181 -Dspark.deploy.zookeeper.dir=/spark" #配置高可用HA
4、配置/opt/spark-2.2.0-bin-hadoop2.7/conf/slaves
datanode15、在namenode1、namenode2上启动spark集群
datanode2
datanode3
/opt/spark-2.2.0-bin-hadoop2.7/sbin/start-all.sh
6、两个master节点进程
7、通过web查看