一、概述
在 Hadoop 2.0.0 之前,一个集群只有一个 Namenode,这将面临单点故障问题。如果 Namenode 机器挂掉了,整个集群就用不了了。只有重启 Namenode ,才能恢复集群。另外正常计划维护集群的时候,还必须先停用整个集群,这样没办法达到 7 * 24 小时可用状态。Hadoop 2.0 及之后版本增加了 Namenode 高可用机制,这里主要讲 Hadoop HA on k8s 环境部署。
非高可用 k8s 环境,可参考我这篇文章:入门 Hadoop 在 K8S 环境中部署
HDFS
YARN
二、开始部署
这里是基于非高可用编排的基础上改造。
1)添加 journalNode 编排
1、控制器 Statefulset
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "hadoop.fullname" . }}-hdfs-jn
annotations:
checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }}
labels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
helm.sh/chart: {{ include "hadoop.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
serviceName: {{ include "hadoop.fullname" . }}-hdfs-jn
replicas: {{ .Values.hdfs.jounralNode.replicas }}
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
spec:
affinity:
podAntiAffinity:
{{- if eq .Values.antiAffinity "hard" }}
requiredDuringSchedulingIgnoredDuringExecution:
- topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
{{- else if eq .Values.antiAffinity "soft" }}
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 5
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
{{- end }}
terminationGracePeriodSeconds: 0
containers:
- name: hdfs-jn
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
command:
- "/bin/bash"
- "/opt/apache/tmp/hadoop-config/bootstrap.sh"
- "-d"
resources:
{{ toYaml .Values.hdfs.jounralNode.resources | indent 10 }}
readinessProbe:
tcpSocket:
port: 8485
initialDelaySeconds: 10
timeoutSeconds: 2
livenessProbe:
tcpSocket:
port: 8485
initialDelaySeconds: 10
timeoutSeconds: 2
volumeMounts:
- name: hadoop-config
mountPath: /opt/apache/tmp/hadoop-config
{{- range .Values.persistence.journalNode.volumes }}
- name: {{ .name }}
mountPath: {{ .mountPath }}
{{- end }}
securityContext:
runAsUser: {{ .Values.securityContext.runAsUser }}
privileged: {{ .Values.securityContext.privileged }}
volumes:
- name: hadoop-config
configMap:
name: {{ include "hadoop.fullname" . }}
{{- if .Values.persistence.journalNode.enabled }}
volumeClaimTemplates:
{{- range .Values.persistence.journalNode.volumes }}
- metadata:
name: {{ .name }}
labels:
app.kubernetes.io/name: {{ include "hadoop.name" $ }}
helm.sh/chart: {{ include "hadoop.chart" $ }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: hdfs-jn
spec:
accessModes:
- {{ $.Values.persistence.journalNode.accessMode | quote }}
resources:
requests:
storage: {{ $.Values.persistence.journalNode.size | quote }}
{{- if $.Values.persistence.journalNode.storageClass }}
{{- if (eq "-" $.Values.persistence.journalNode.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: "{{ $.Values.persistence.journalNode.storageClass }}"
{{- end }}
{{- end }}
{{- else }}
- name: dfs
emptyDir: {}
{{- end }}
{{- end }}
2、service
# A headless service to create DNS records
apiVersion: v1
kind: Service
metadata:
name: {{ include "hadoop.fullname" . }}-hdfs-jn
labels:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
helm.sh/chart: {{ include "hadoop.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
spec:
ports:
- name: jn
port: {{ .Values.service.journalNode.ports.jn }}
protocol: TCP
{{- if and (eq .Values.service.journalNode.type "NodePort") .Values.service.journalNode.nodePorts.jn }}
nodePort: {{ .Values.service.journalNode.nodePorts.jn }}
{{- end }}
type: {{ .Values.service.journalNode.type }}
selector:
app.kubernetes.io/name: {{ include "hadoop.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: hdfs-jn
2)修改配置
1、修改 values.yaml
image:
repository: myharbor.com/bigdata/hadoop
tag: 3.3.2
pullPolicy: IfNotPresent
# The version of the hadoop libraries being used in the image.
hadoopVersion: 3.3.2
logLevel: INFO
# Select antiAffinity as either hard or soft, default is soft
antiAffinity: "soft"
hdfs:
nameNode:
replicas: 2
pdbMinAvailable: 1
resources:
requests:
memory: "256Mi"
cpu: "10m"
limits:
memory: "2048Mi"
cpu: "1000m"
dataNode:
# Will be used as dfs.datanode.hostname
# You still need to set up services + ingress for every DN
# Datanodes will expect to
externalHostname: example.com
externalDataPortRangeStart: 9866
externalHTTPPortRangeStart: 9864
replicas: 3
pdbMinAvailable: 1
resources:
requests:
memory: "256Mi"
cpu: "10m"
limits:
memory: "2048Mi"
cpu: "1000m"
webhdfs:
enabled: true
jounralNode:
replicas: 3
pdbMinAvailable: 1
resources:
requests:
memory: "256Mi"
cpu: "10m"
limits:
memory: "2048Mi"
cpu: "1000m"
yarn:
resourceManager:
pdbMinAvailable: 1
replicas: 2
resources:
requests:
memory: "256Mi"
cpu: "10m"
limits:
memory: "2048Mi"
cpu: "2000m"
nodeManager:
pdbMinAvailable: 1
# The number of YARN NodeManager instances.
replicas: 1
# Create statefulsets in parallel (K8S 1.7+)
parallelCreate: false
# CPU and memory resources allocated to each node manager pod.
# This should be tuned to fit your workload.
resources:
requests:
memory: "256Mi"
cpu: "500m"
limits:
memory: "2048Mi"
cpu: "1000m"
persistence:
nameNode:
enabled: true
storageClass: "hadoop-ha-nn-local-storage"
accessMode: ReadWriteOnce
size: 1Gi
local:
- name: hadoop-ha-nn-0
host: "local-168-182-110"
path: "/opt/bigdata/servers/hadoop-ha/nn/data/data1"
- name: hadoop-ha-nn-1
host: "local-168-182-111"
path: "/opt/bigdata/servers/hadoop-ha/nn/data/data1"
dataNode:
enabled: true
enabledStorageClass: false
storageClass: "hadoop-ha-dn-local-storage"
accessMode: ReadWriteOnce
size: 1Gi
local:
- name: hadoop-ha-dn-0
host: "local-168-182-110"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data1"
- name: hadoop-ha-dn-1
host: "local-168-182-110"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data2"
- name: hadoop-ha-dn-2
host: "local-168-182-110"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data3"
- name: hadoop-ha-dn-3
host: "local-168-182-111"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data1"
- name: hadoop-ha-dn-4
host: "local-168-182-111"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data2"
- name: hadoop-ha-dn-5
host: "local-168-182-111"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data3"
- name: hadoop-ha-dn-6
host: "local-168-182-112"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data1"
- name: hadoop-ha-dn-7
host: "local-168-182-112"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data2"
- name: hadoop-ha-dn-8
host: "local-168-182-112"
path: "/opt/bigdata/servers/hadoop-ha/dn/data/data3"
volumes:
- name: dfs1
mountPath: /opt/apache/hdfs/datanode1
hostPath: /opt/bigdata/servers/hadoop-ha/dn/data/data1
- name: dfs2
mountPath: /opt/apache/hdfs/datanode2
hostPath: /opt/bigdata/servers/hadoop-ha/dn/data/data2
- name: dfs3
mountPath: /opt/apache/hdfs/datanode3
hostPath: /opt/bigdata/servers/hadoop-ha/dn/data/data3
journalNode:
enabled: true
storageClass: "hadoop-ha-jn-local-storage"
accessMode: ReadWriteOnce
size: 1Gi
local:
- name: hadoop-ha-jn-0
host: "local-168-182-110"
path: "/opt/bigdata/servers/hadoop-ha/jn/data/data1"
- name: hadoop-ha-jn-1
host: "local-168-182-111"
path: "/opt/bigdata/servers/hadoop-ha/jn/data/data1"
- name: hadoop-ha-jn-2
host: "local-168-182-112"
path: "/opt/bigdata/servers/hadoop-ha/jn/data/data1"
volumes:
- name: jn
mountPath: /opt/apache/hdfs/journalnode
service:
nameNode:
type: NodePort
ports:
dfs: 9000
webhdfs: 9870
nodePorts:
dfs: 30900
webhdfs: 30870
dataNode:
type: NodePort
ports:
webhdfs: 9864
nodePorts:
webhdfs: 30864
resourceManager:
type: NodePort
ports:
web: 8088
nodePorts:
web: 30088
journalNode:
type: ClusterIP
ports:
jn: 8485
nodePorts:
jn: ""
securityContext:
runAsUser: 9999
privileged: true
2、修改 hadoop/templates/hadoop-configmap.yaml
修改的内容比较多,这里就不贴出来了,最下面会给出 git 下载地址。
3)开始安装
# 创建存储目录
mkdir -p /opt/bigdata/servers/hadoop-ha/{nn,dn,jn}/data/data{1..3}
chmod -R 777 -R /opt/bigdata/servers/hadoop-ha/{nn,dn,jn}
helm install hadoop-ha ./hadoop -n hadoop-ha --create-namespace
查看
kubectl get pods,svc -n hadoop-ha -owide
HDFS WEB-nn1:
http://192.168.182.110:31870/dfshealth.html#tab-overview
HDFS WEB-nn2:
http://192.168.182.110:31871/dfshealth.html#tab-overview
YARN WEB-rm1:
YARN WEB-rm2:
4)测试验证
kubectl exec -it hadoop-ha-hadoop-hdfs-nn-0 -n hadoop-ha -- bash
5)卸载
helm uninstall hadoop-ha -n hadoop-ha
kubectl delete pod -n hadoop-ha `kubectl get pod -n hadoop-ha|awk 'NR>1{print $1}'` --force
kubectl patch ns hadoop-ha -p '{"metadata":{"finalizers":null}}'
kubectl delete ns hadoop-ha --force
rm -fr /opt/bigdata/servers/hadoop-ha/{nn,dn,jn}/data/data{1..3}/*
git 下载地址: