YARN label 特性 & 指定队列及label提交任务

时间:2024-09-23 09:04:26

以下基于 hadoop版本 hadoop-2.8.4

给各个节点打标签

yarn rmadmin -addToClusterNodeLabels fastcpu,normal
# 是否独占默认是true,不独占其他队列就可以normal标签资源
yarn rmadmin -addToClusterNodeLabels "fastcpu,normal(exclusive=false)"
yarn rmadmin -replaceLabelsOnNode "container-192-168-200-140,fastcpu"
# container-192-168-200-141不打标签,也可以不执行此命令
yarn rmadmin -replaceLabelsOnNode "container-192-168-200-141, "
yarn rmadmin -replaceLabelsOnNode "container-192-168-200-142,normal"
1.spark任务可以通过spark-submit方式指定参数spark.yarn.am.nodeLabelExpression和spark.yarn.executor.nodeLabelExpression来实现,使任务提交到特定的分区。
./bin/spark-submit --class org.apache.spark.examples.SparkPi
--master yarn --deploy-mode cluster --driver-memory 3g --executor-memory 2g
--conf spark.yarn.am.nodeLabelExpression=fastcpu
--conf spark.yarn.executor.nodeLabelExpression=fastcpu jars/spark-examples.jar 10
2.hadoop distribute shell / hadoop job 也可以。
hadoop jar ./share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.8.4.jar -shell_command "sleep 100000" -jar ./share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.8.4.jar  -num_containers 5 -queue etl-ywpt -node_label_expression fastcpu
mr example wordcount:
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.4.jar wordcount -D mapreduce.job.node-label-expression="fastcpu" -D mapreduce.job.queuename="etl-ywpt" /hadoop-hadoop-namenode-container-192-168-200-140.log /output
3.另外还可以通过yarn代码的方式提交;hive可以指定如下参数 提交指定队列及label。
set mapreduce.job.node-label-expression=fastcpu;
set mapreduce.job.queuename=etl-bi;

yarn web ui

yarn 8088调度页面

YARN label 特性 & 指定队列及label提交任务

node 的 label 页面

YARN label 特性 & 指定队列及label提交任务


容量调度才能使用label便签,公平调度暂时不支持标签。
配置文件 capacity-scheduler.xml
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
<description>
Maximum number of applications that can be pending and running.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default,etl-ywpt,etl-bi,etl-ly,etl-test,hue,bi</value>
</property>
<!--
<property>
<name>yarn.scheduler.capacity.root.acl_submit_applications</name>
<value> </value>
</property>
<property>
<name>yarn.scheduler.capacity.root.acl_administer_queue</name>
<value>hadoop </value>
</property>
-->
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.2</value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
</description>
</property> <property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
</property> <!--
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
</description>
</property>
--> <!-- label两种模式 normal/fastcpu --> <!-- root队列可提交的标签 -->
<property>
<name>yarn.scheduler.capacity.root.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels.normal.capacity</name>
<value>100</value>
<description>root队列对normal标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels.fastcpu.capacity</name>
<value>100</value>
<description>root队列对fastcpu标签节点可用的百分比</description>
</property> <!-- default队列 --> <property>
<name>yarn.scheduler.capacity.root.default.accessible-node-labels</name>
<value>normal</value>
<description>default队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.default-node-label-expression</name>
<value>normal</value>
<description>default队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.accessible-node-labels.normal.capacity</name>
<value>10</value>
<description>default队列对normal标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.accessible-node-labels.normal.maximum-capacity</name>
<value>50</value>
</property> <property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>10</value>
<description>Default queue target capacity.</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>20</value>
<description>
The maximum capacity of the default queue.
</description>
</property> <!-- etl-ywpt队列 --> <property>
<name>yarn.scheduler.capacity.root.etl-ywpt.accessible-node-labels</name>
<value>*</value>
<description>etl-ywpt队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ywpt.default-node-label-expression</name>
<value>normal</value>
<description>etl-ywpt队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ywpt.accessible-node-labels.normal.capacity</name>
<value>10</value>
<description>etl-ywpt队列对fastcpu标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ywpt.accessible-node-labels.normal.maximum-capacity</name>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ywpt.accessible-node-labels.fastcpu.capacity</name>
<value>50</value>
<description>etl-ywpt队列对fastcpu标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ywpt.accessible-node-labels.fastcpu.maximum-capacity</name>
<value>60</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-ywpt.capacity</name>
<value>5</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-ywpt.maximum-capacity</name>
<value>30</value>
</property> <!-- etl-bi队列 --> <property>
<name>yarn.scheduler.capacity.root.etl-bi.accessible-node-labels</name>
<value>fastcpu</value>
<description>etl-bi队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-bi.default-node-label-expression</name>
<value>fastcpu</value>
<description>etl-bi队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-bi.accessible-node-labels.fastcpu.capacity</name>
<value>50</value>
<description>etl-bi队列对fastcpu标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-bi.accessible-node-labels.fastcpu.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-bi.capacity</name>
<value>3</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-bi.maximum-capacity</name>
<value>20</value>
</property> <!-- etl-ly队列 -->
<property>
<name>yarn.scheduler.capacity.root.etl-ly.accessible-node-labels</name>
<value>normal</value>
<description>etl-ly队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ly.default-node-label-expression</name>
<value>normal</value>
<description>etl-ly队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ly.accessible-node-labels.normal.capacity</name>
<value>20</value>
<description>default队列对normal标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-ly.accessible-node-labels.fastcpu.maximum-capacity</name>
<value>100</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-ly.capacity</name>
<value>52</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-ly.maximum-capacity</name>
<value>55</value>
</property> <!-- etl-test队列 -->
<property>
<name>yarn.scheduler.capacity.root.etl-test.accessible-node-labels</name>
<value>normal</value>
<description>etl-test队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-test.default-node-label-expression</name>
<value>normal</value>
<description>etl-test队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-test.accessible-node-labels.normal.capacity</name>
<value>10</value>
<description>etl-test队列对normal标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-test.accessible-node-labels.normal.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl-test.capacity</name>
<value>10</value>
</property> <property>
<name>yarn.scheduler.capacity.root.etl-test.maximum-capacity</name>
<value>50</value>
</property> <!-- hue队列 -->
<property>
<name>yarn.scheduler.capacity.root.hue.accessible-node-labels</name>
<value>normal</value>
<description>hue队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.hue.default-node-label-expression</name>
<value>normal</value>
<description>hue队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.hue.accessible-node-labels.normal.capacity</name>
<value>10</value>
<description>hue队列对normal标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.hue.accessible-node-labels.normal.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.hue.capacity</name>
<value>10</value>
</property> <property>
<name>yarn.scheduler.capacity.root.hue.maximum-capacity</name>
<value>50</value>
</property> <!-- bi队列 -->
<property>
<name>yarn.scheduler.capacity.root.bi.accessible-node-labels</name>
<value>normal</value>
<description>bi队列应用可用的节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.bi.default-node-label-expression</name>
<value>normal</value>
<description>bi队列应用默认节点标签</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.bi.accessible-node-labels.normal.capacity</name>
<value>40</value>
<description>bi队列对fastcpu标签节点可用的百分比</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.bi.accessible-node-labels.normal.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.bi.capacity</name>
<value>10</value>
</property> <property>
<name>yarn.scheduler.capacity.root.bi.maximum-capacity</name>
<value>20</value>
</property> </configuration>