Hadoop 2.6.0集群搭建

时间:2024-01-10 14:35:03

yum install gcc

yum install gcc-c++

yum install make

yum install autoconfautomake libtool cmake

yum install ncurses-devel

yum install openssl-devel

groupadd hadoop  添加一个组

useradd hadoop -g hadoop  添加用户

安装protoc(需用root用户)

1 tar -xvf protobuf-2.5.0.tar.bz2

2 cd protobuf-2.5.0

3 ./configure --prefix=/opt/protoc/

4 make && make install

编译hadoop

mvn clean package -Pdist,native -DskipTests -Dtar

编译完的hadoop在 /home/hadoop/ocdc/hadoop-2.6.0-src/hadoop-dist/target 路径下

配置hosts文件

10.1.245.244 master

10.1.245.243 slave1

命令行输入 hostname master

免密码登录:

执行命令生成密钥: ssh-keygen -t rsa -P ""

进入文件夹cd  .ssh (进入文件夹后可以执行ls  -a 查看文件)

将生成的公钥id_rsa.pub 内容追加到authorized_keys(执行命令:cat id_rsa.pub >> authorized_keys)

---------------------------(core-site.xml)------------------------------

<configuration>

<!--指定hdfs的nameservice为ns1-->

<property>

<name>fs.defaultFS</name>

<value>hdfs://master</value>

</property>

<property>

<name>io.file.buffer.size</name>

<value>131072</value>

</property>

<!--指定hadoop数据存放目录-->

<property>

<name>hadoop.tmp.dir</name>

<value>/home/hadoop/ocdc/hadoop-2.6.0/tmp</value>

<description>Abasefor other temporary directories.</description>

</property>

<property>

<name>hadoop.proxyuser.spark.hosts</name>

<value>*</value>

</property>

<property>

<name>hadoop.proxyuser.spark.groups</name>

<value>*</value>

</property>

</configuration>

<!--指定zookeeper地址-->

<property>

<name>ha.zookeeper.quorum</name>

<value>h4:2181,h5:2181,h6:2181</value>

</property>

</configuration>

-------------------------------(hdfs-site.xml )-----------------------------------

<configuration>

<property>

<name>dfs.namenode.secondary.http-address</name>

<value>master:9001</value>

</property>

<property>

<name>dfs.namenode.name.dir</name>

<value>/home/hadoop/ocdc/hadoop-2.6.0/name</value>

</property>

<property>

<name>dfs.datanode.data.dir</name>

<value>/home/hadoop/ocdc/hadoop-2.6.0/data</value>

</property>

<property>

<name>dfs.replication</name>

<value>3</value>

</property>

<property>

<name>dfs.webhdfs.enabled</name>

<value>true</value>

</property>

<property>

<name>dfs.nameservices</name>

<value>ns1</value>

</property>

<!-- ns1下面有两个NameNode,分别是nn1,nn2 -->

<property>

<name>dfs.ha.namenodes.ns1</name>

<value>nn1,nn2</value>

</property>

</configuration>

----------------------------------------(yarn-site.xml)--------------------------

<configuration>

<!-- Site specific YARN configuration properties -->

<!-- 指定nodemanager启动时加载server的方式为shuffle server -->

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>

<value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.address</name>

<value>master:8032</value>

</property>

<property>

<name>yarn.resourcemanager.scheduler.address</name>

<value>master:8030</value>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>master:8035</value>

</property>

<property>

<name>yarn.resourcemanager.admin.address</name>

<value>master:8033</value>

</property>

<property>

<name>yarn.resourcemanager.webapp.address</name>

<value>master:8088</value>

</property>

<property>

<name>yarn.nodemanager.resource.memory-mb</name>

<value>16384</value>

</property>

</configuration>

<!-- 指定resourcemanager地址 -->

<property>

<name>yarn.resourcemanager.hostname</name>

<value>h3</value>

</property>

-----------------------------(mapred-site.xml)-------------------------------

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

<property>

<name>mapreduce.jobhistory.address</name>

<value>master:10020</value>

</property>

<property>

<name>mapreduce.jobhistory.webapp.address</name>

<value>master:19888</value>

</property>

<property>

<name>yarn.scheduler.maximum-allocation-mb</name>

<value>16384</value>

</property>

</configuration>