-
环境准备
安装java
yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel
//所有的机器修改profile文件增加这行代码
export JAVA_HOME=/usr/lib/jvm/java-openjdk
三台服务器(由于我是学习环境,所以三台服务器不属于同一内网),配置如下:
公网ip 119.29.186.83 内网ip 10.104.157.113
公网ip 119.29.250.47 内网ip 10.104.9.181
公网ip 119.29.251.99 内网ip 10.104.196.48
软件包下载:
http://archive.cloudera.com/cdh5/cdh/5/ 下载地址
下载的文件名:
1.hadoop-2.6.0-cdh5.8.3.tar (hadoop的安装包)
2.hive-1.1.0-cdh5.8.3.tar
3.zookeeper-3.4.5-cdh5.8.3.tar
4.scala-2.11.8.tar
5.kafka_2.11-0.10.2.0.tar
6.slf4j-1.7.9.zip
2.编辑hosts文件,并且配置免密码登录
在119.29.186.83的机器
vim /etc/hosts 10.104.157.113 master 119.29.250.47 slave 119.29.251.99 slave1
然后使用scp命令
scp /etc/hosts root@slave:/etc
scp /etc/hosts root@slave1:/etc
由于我这边的服务器不是同一个内网中(内网中不需要更改),所以要登录到slave的各台机器,修改hosts中的公网ip和内网ip以对应
slave1中的文件如下
127.0.0.1 localhost localhost.localdomain VM_157_113_centos ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 119.29.186.83 master 10.104.9.181 slave1 119.29.251.99 slave
免密登录:
ssh-copy-id root@slave
ssh-copy-id root@slave1
关闭防火墙:
systemctl start firewalld firewall-cmd --permanent --zone=public --add-port=1-63000/tcp firewall-cmd --permanent --zone=public --add-port=1-63000/udp firewall-cmd --reload
hadoop安装
在本机解压(本机编辑起来比服务器上方便)
- tar -zxvf hadoop-2.6.0-cdh5.8.3.tar.gz
- 然后右键重命名为hadoop
- 进入hadoop文件夹中 cd hadoop/etc/hadoop
-
修改core-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> //文件系统 <value>hdfs://master:9000</value> </property> </configuration>
-
修改hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.replication</name> //副本数量 <value>2</value> </property> <property> <name>dfs.name.dir</name> <value>/usr/local/data/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/usr/local/data/datanode</value> </property> <property> <name>dfs.tmp.dir</name> <value>/usr/local/data/tmp</value> </property> </configuration>
-
修改mapred-site.xml
<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
-
修改yarn-site.xml
<?xml version="1.0"?> <configuration> //yarn的主机 <property> <name>yarn.resoucemanager.hostname</name> <value>master</value> </property> //shuffle服务 <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration>
-
修改slaves文件
slave slave1
-
将hadoop文件压缩为hadoop.zip
-
分发安装包,并解压
scp hadoop.zip root@master:/usr/local scp hadoop.zip root@slave:/usr/local
cd /usr/local
unzip hadoop.zip scp hadoop.zip root@slave1:/usr/local - 登录到master机器上,解压hadoop.zip
ssh root@master cd /usr/local unzip hadoop.zip
rm -rf hadoop.zip - 到各个节点中创建/use/local/data目录
ssh root@slave1 mkdir /usr/local/data ssh root@slave mkdir /usr/local/data ssh root@master mkdir /usr/local/data
- 编辑环境变量
ssh root@master vim ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-openjd export HADOOP_HOME=/usr/local/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin scp ~/.bashrc root@slave1:~/ ssh root@slave source ~/.bashrc scp ~/.bashrc root@slave:~/ ssh root@slave1 source ~/.bashrc
- 格式化namenode
ssh root@master hdfs namenode -format
- 启动hdfs集群和yarn集群
start-dfs.sh
start-yarn.sh - 访问master:50070和master:8088可分别查看web端页面
hive搭建
- 上传tar文件
scp hive-1.1.0-cdh5.8.3.tar root@master:/usr/local
ssh root@master
cd /usr/local
tar -xvf hive-1.1.0-cdh5.8.3.tar
rm -rf tar -xvf hive-1.1.0-cdh5.8.3.tar
mv hive-1.1.0-cdh5.8.3 hive - 修改~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin
- 安装mysql 请移到我的一篇博客 http://blog.csdn.net/qq_30259339/article/details/50466494
- 创建数据库
mysql> create database if not exists hive_metadata;
- 将mysql连接包拷贝到hive的依赖包中
scp mysql-connector-java-5.1.6.jar root@master:/usr/local/hive/lib
- 修改配置文件hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>hive.metastore.schema.verification</name> <value>false</value> <description> Enforce metastore schema version consistency. True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default) False: Warn if the version information stored in metastore doesn't match with one from in Hive jars. </description> </property> //mysql服务器地址 <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://slave1:3306/hive_metadata?createDatebaseIfNotExist=true</value> <description>jdbc</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>driver class</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>admin</value> <description>jdbc</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>admin</value> <description>jdbc</description> </property> </configuration>
scp hive-site.xml root@master:/usr/local/hive/conf
- 修改hive-config.sh 并重名hive-env.sh
mv hive-env.sh.template hive-env.sh vim /usr/local/hive/bin/hive-config.sh export HIVE_CONF_DIR=$HIVE_CONF_DIR export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH export HIVE_HOME=/usr/local/hive export HADOOP_HOME=/usr/local/hadoop # Default to use 256MB export JAVA_HOME=/usr/lib/jvm/java-openjdk
命令行输入hive,就直接进入了hive
ZOOKERPER集群搭建
- 在本地解压zookeeper-3.4.5-cdh5.8.3.tar
- 然后重命名为zk文件夹
- 进入conf文件夹,重名文件 mv zoo_sample.cfg zoo.cfg
tickTime=2000 initLimit=10 syncLimit=5 //修改dataDir dataDir=/usr/local/zk/data clientPort=2181 //下面为新增内容 server.0=master:2888:3888 server.1=slave:2888:3888 server.2=slave1:2888:3888
- 压缩zk文件夹,并上传到服务器
scp zk.zip root@master:/usr/local scp zk.zip root@slave:/usr/local scp zk.zip root@slave1:/usr/local
然后各自解压
- 到各个服务器上建立相应目录
ssh root@master cd /usr/localzk mkdir data vim myid 0 ssh root@slave cd /usr/localzk mkdir data vim myid 1 ssh root@slave1 cd /usr/localzk mkdir data vim myid 2
- 配置环境变量
vim ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export ZOOKEEPER_HOME=/usr/local/zk export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin source ~/.bashrc scp ~/.bashrc root@slave:~/ scp ~/.bashrc root@slave1:~/
- 启动zookper集群
//分别在三台机器上执行 zkServer.sh start //检查zooker状态 zkServer.sh status
//jps检查 是否有QuromPeerMain
kafka集群搭建
- scala安装
http://www.scala-lang.org/download/2.11.8.html网站下载scala2.11.8.tgz
scp scala-2.11.8.tgz root@master:/usr/localscp scala-2.11.8.tgz root@slave:/usr/local
scp scala-2.11.8.tgz root@slave1:/usr/local
//到各个服务器上进行一下操作,这里以master以例子
ssh root@master
tar -zxvf scala-2.11.8.tgz
mv scala-2.11.8 scala编辑环境变量
export JAVA_HOME=/usr/lib/jvm/java-openjdk export HADOOP_HOME=/usr/local/hadoop export HIVE_HOME=/usr/local/hive export ZOOKEEPER_HOME=/usr/local/zk export SCALA_HOME=/usr/local/scala export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
分发文件
scp ~/.bashrc root@slave:~/ scp ~/.bashrc root@slave1:~/
-
kafka搭建
首先在本地解压kafka_2.11-0.10.2.0.tar
tar -xvf kafka_2.11-0.10.2.0.tar
mv kafka_2.11-0.10.2.0.tar kafka配置kafka,修改config目录中的server.properties文件
#将zookeeper配置为我们的zookeeper集群 zookeeper.connect=master:2181,slave:2181,slave1:2181
//在集群中 各台机器的broker.id配置为递增的,这是master的配置文件,slave可为broker.id=1 broker.id=0
安装slf4j
unzip slf4j-1.7.9.zip
并把slf4j-nop-1.7.9.jar复制到kafka的libs目录下压缩kafka,并上传到各个服务器上
kafka.zip root@master:/usr/local scp kafka.zip root@slave:/usr/local scp kafka.zip root@slave1:/usr/local //各自unzip unzip kafka.zip
修改config目录中的server.properties文件的broker.id
# The id of the broker. This must be set to a unique integer for each broker. broker.id=1 # The id of the broker. This must be set to a unique integer for each broker. broker.id=2
解决kafka Unrecognized VM option 'UseCompressOops问题
cd kafka vim bin/kafka-run-class.sh 去掉 -XX:+UseCompressedOops
启动,在三条机器上分别执行以下命令(cd 到kafka目录中)nohup bin/kafka-server-start.sh config/server.properties &
cat nohup.out 可以查看是否启动success