hadoop2.4.0 安装配置 (2)

时间:2022-03-26 02:32:57

hdfs-site.xml 配置如下:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
--> <!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>Master:9001</value>
</property> <property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property> <property>
<name>dfs.replication</name>
<!-- dfs.replication -它决定着 系统里面的文件块的数据备份个数。 对于一个实际的应用,它应该被设为3(这个 数字并没有上限,但更多的备份可能并没有作用,而且会占用更多的空间)。 少于三个的备份,可能会影响到数据的 可靠性(系统故障时,也许会造成数据丢失) -->
<value>1</value>
</property>
<property>
<name>hadoop.tmp.dir</name> <value>/usr/local/hadoop-2.4.0/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>dfs.name.dir</name>
<!-- dfs.name.dir - 这是NameNode结点存储hadoop文件系统信息的本地系统路径。 这个值只对NameNode有效,DataNode并不需要使用到它。 上面对于/temp类型的警告,同样也适用于这里。在实际应用中,它最好被覆盖掉。 -->
<value>/usr/local/hadoop-2.4.0/dfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<!-- dfs.data.dir - 这是DataNode结点被指定要存储数据的本地文件系统路径。 DataNode结点上 的这个路径没有必要完全相同,因为每台机器的环境很可能是不一样的。 但如果每台机器上的这 个路径都是统一配置的话,会使工作变得简单一些。 默认的情况下,它的值hadoop.tmp.dir, 这 个路径只能用于测试的目的,因为,它很可能会丢失掉一些数据。所以,这个值最好还是被覆 盖。 -->
<value>/usr/local/hadoop-2.4.0/dfs/data</value>
</property>
<!-- 解决:org.apache.hadoop.security.AccessControlException:Permission denied:user=Administrator,access=WRITE,inode="tmp":root:supergroup:rwxr-xr-x。因为Eclipse使用hadoop插件提交作业时,会默认以 DrWho身份去将作业写入hdfs文件系统中,对应的也就是 HDFS 上的/user/hadoop , 由于 DrWho用户对hadoop目录并没有写入权限,所以导致异常的发生。解决方法为:放开 hadoop 目录的权限, 命令如下 :$ hadoop fs -chmod 777 /user/hadoop -->
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>If "true", enable permission checking in HDFS. If "false", permission checking is turned off, but all other behavior is unchanged. Switching from one parameter value to the other does not change the mode, owner or group of files or directories</description>
</property> </configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
--> <!-- Put site-specific property overrides in this file. -->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<!-- fs.default.name -这是一个描述集群中NameNode结点的URI(包括协议、主机名称、端口号),集群里面的每一台机器都需要知道NameNode的地址。DataNode结点会先在NameNode上注册,这样它们的数据才可以被使用。独立的客户端程序通过这个URI跟DataNode交互,以取得文件的块列表。-->
<value>hdfs://Master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<!-- hadoop.tmp.dir 是hadoop文件系统依赖的基础配置,很多路径都依赖它。如果hdfs-site.xml中不配 置namenode和datanode的存放位置,默认就放在这个路径中Hadoop的默认临时路径,这个最好配置,然后在新增节点或者其他情况下莫名其妙的DataNode启动不了,就删除此文件中的tmp目录即可。 不过如果删除了NameNode机器的此目录,那么就需要重新执行NameNode格式化的命令了。 -->
<value>/usr/local/hadoop-2.4.0/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<!-- 添加httpfs的选项 -->
<property>
<name>hadoop.proxyuser.hduser.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.groups</name>
<value>*</value>
</property> <property>
<name>hadoop.native.lib</name>
<value>true</value>
<description>Should native hadoop libraries, if present, be used.</description>
</property>
</configuration>

yarn-site.xml

<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--
<property>
<name>mapreduce.jobhistory.address</name>
<value>Master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>Master:19888</value>
</property> -->
</configuration>
<?xml version="1.0" encoding="utf-8"?>

<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
--> <configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>Master</value>
</property> <property>
<name>yarn.resourcemanager.hostname</name>
<value>0.0.0.0</value>
</property>
</configuration>

yarn.nodemanager.hostname如果配置成具体的IP,如10.12.154.79,则会导致每个NamoManager的配置不同。详细配置可参考:

http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml