Hadoop-2.2.0伪分布式和hbase-0.96.0-hadoop2伪分布式搭建
1. 部署
(1)主机名
gedit /etc/hostname
master1
sudo gedit /etc/hosts
127.0.0.1 master1
(2)SSH
sudo apt-get install ssh
mkdir /home/hadoop/.ssh
ssh-keygen -t rsa
cat id_rsa.pub >> authorized_keys
ssh localhost
ssh master1
(3)环境变量
exportANT_HOME=/opt/apache-ant-1.9.3
exportMAVEN_HOME=/opt/apache-maven-3.0.5
exportFINDBUGS_HOME=/opt/findbugs-2.0.2
exportPATH=${ANT_HOME}/bin:${MAVEN_HOME}/bin:${FINDBUGS_HOME}/bin:$PATH
#jdk
exportJAVA_HOME=/opt/jdk1.7.0_45
exportJRE_HOME=/opt/jdk1.7.0_45
exportJRE_HOME=/opt/jdk1.7.0_45/jre
exportCLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
exportPATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
#hadoop
exportHADOOP_HOME=/opt/hadoop-2.2.0
exportHADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
exportPATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#hbase
exportHBASE_HOME=/opt/hbase-0.96.0-hadoop2
exportPATH=$PATH:$HBASE_HOME/bin
(4)放置
/opt/jdk1.7.0_45
/opt/hadoop-2.2.0
/opt/hbase-0.96.0-hadoop2
(5)本地目录创建(根据下面的配置文件)
mkdir -p /data/tmp_hadoop /data/hdfs/nn /data/hdfs/dn /data/tmp_hbase /data/yarn/local /data/yarn/logs/data/log/hadoop-hdfs /data/log/hadoop-yarn /data/log/hadoop-mapred
2. Hadoop-2.2.0配置
cd /opt/hadoop-2.2.0/etc/hadoop
(1) core-site.xml
<!--- global properties -->
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp_hadoop</value>
<!-- /tmp/hadoop-${user.name} -->
<!--A base for other temporarydirectories-->
</property>
<!-- file system properties -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
<!--24h,0代表关闭-->
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>1440</value>
<!--一般小于等于fs.trash.interval-->
</property>
<!-- i/o properties -->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<!--读写序列化文件缓冲区大小-->
</property>
<!-- Local file system -->
<property>
<name>dfs.blocksize</name>
<value>67108864</value>
<!--268435456 256M-->
</property>
(2)hadoop-env.sh
export JAVA_HOME=/opt/jdk1.7.0_45
export HADOOP_LOG_DIR=/data/log/hadoop-hdfs
export YARN_LOG_DIR=/data/log/hadoop-yarn
exportHADOOP_MAPRED_LOG_DIR=/data/log/hadoop-mapred
(3) hdfs-site.xml
<!--需要配置本地目录的地方-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hdfs/nn</value>
<!--file://${hadoop.tmp.dir}/dfs/name-->
<!--命名空间和事务处理日志-->
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/hdfs/dn</value>
<!--file://${hadoop.tmp.dir}/dfs/data-->
<!--DataNode本地文件存储的路径-->
</property>
<!--Snn单机必须配置-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:9001</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<!--冗余备份数目,一般为3-->
</property>
(4) mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<!--运行框架设置为 Hadoop YARN-->
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>13562</value>
</property>
<!--JHS-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>localhost:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhost:19888</value>
</property>
(5) masters
localhost
(6) slaves
localhost
(7) yarn-env.sh
export JAVA_HOME=/opt/jdk1.7.0_45
(8) yarn-site.xml
<!-- Resource Manager Configs -->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:8088</value>
<!--RMweb-->
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
<!--RM admin interface.-->
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
<!--RM设置客户端提交job-->
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
<!--schedulerinterface -->
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<!--NodeManager-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
(9) 启动hadoop
sudo chown -R hadoop:hadoop hadoop-2.2.0/
hadoop namenode -format
start-dfs.sh
jps一下
NameNode
DataNode
SecondaryNameNode
localhost:50070
satrt-yarn.sh
jps一下:
ResourceManager
NodeManager
localhost:8088
mr-jobhistory-daemon.sh start historyserver
jps一下:
JobHistoryServer
localhost:19888
3. Hbase-0.96.0-hadoop2配置
(1)hbase-env.sh
export JAVA_HOME=/opt/jdk1.7.0_45
exportPATH=$JAVA_HOME/bin:$JAVA_HOME/jre:$PATH
export HBASE_MANAGES_ZK=true
export HBASE_CLASSPATH=/opt/hadoop-2.2.0/etc/hadoop
export HBASE_LOG_DIR=/data/hbase/logs
(2)hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/data/tmp_hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data/hbase/zookeeper</value>
<!—使用内置zookeeper的快照存放-->
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>localhost</value>
</property>
(3)regionservers
localhost
(4)启动hbase
sudo chown -R hadoop:hadoop hbase-0.96.0-hadoop2/
start-hbase.sh
jps一下:
HQuorumPeer
HRegionServer
HMaster
访问:localhost:60010