高级调度方式
- 3种影响调度方式:
- 节点选择器:nodeSelector,nodeName
- 节点亲和调度:nodeAffinity
参考博客
https://blog.csdn.net/weixin_36171533/article/details/82757713
https://blog.csdn.net/redenval/article/details/82992706
https://www.jianshu.com/p/61725f179223
https://www.cnblogs.com/cocowool/p/kubernetes_affinity.html
节点选择器
nodeSelector
[root@master schedule]# cat pod-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-demo
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
nodeSelector:
desktype: ssd #标签选择器,选择标签是desktype: ssd的nodes
将node1设置一个标签
kubectl label nodes node2 desktype=ssd
nodeName
apiVersion: apps/v1beta2
kind: Deployment
metadata:
name: consul
namespace: vitamin
spec:
replicas: 3
selector:
matchLabels:
workload.user.cattle.io/workloadselector: vitamin-consul
template:
metadata:
labels:
workload.user.cattle.io/workloadselector: vitamin-consul
spec:
nodeName: 172.16.0.78
containers:
image: consul:1.3.1
imagePullPolicy: IfNotPresent
resources(内存,cpu)
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: prometheus
namespace: kube-system
spec:
serviceName: "prometheus"
replicas: 1
podManagementPolicy: "Parallel"
updateStrategy:
type: "RollingUpdate"
selector:
matchLabels:
k8s-app: prometheus
template:
metadata:
labels:
k8s-app: prometheus
spec:
- name: prometheus-server
image: private.winchannel.net/google_containers/prometheus:v2.2.1
resources:
limits:
cpu: 2000m
memory: 8000Mi
requests:
cpu: 100m
memory: 100Mi
亲和性调度(Affinity)
[root@master ~]# kubectl explain pods.spec.affinity
KIND: Pod
VERSION: v1
RESOURCE: affinity <Object>
DESCRIPTION:
If specified, the pod's scheduling constraints
Affinity is a group of affinity scheduling rules.
FIELDS:
nodeAffinity <Object> # 节点亲和性
Describes node affinity scheduling rules for the pod.
podAffinity <Object> # Pod亲和性
Describes pod affinity scheduling rules (e.g. co-locate this pod in the
same node, zone, etc. as some other pod(s)).
podAntiAffinity <Object> # Pod反亲和性
Describes pod anti-affinity scheduling rules (e.g. avoid putting this pod
in the same node, zone, etc. as some other pod(s)).
Node亲和性调度
[root@master ~]# kubectl explain pods.spec.affinity.nodeAffinity | grep "<.*>"
RESOURCE: nodeAffinity <Object>
preferredDuringSchedulingIgnoredDuringExecution <[]Object> #软亲和性,尽量满足条件,即不满足要求也可以调度
requiredDuringSchedulingIgnoredDuringExecution <Object> #硬亲和性必须满足条件才可以调度
硬亲和调度
requiredDuringSchedulingIgnoredDuringExecution,通过区域判定,如果节点中拥有此标签则在此创建pod
apiVersion: v1
kind: Pod
metadata:
name: pod-demo
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
matchExpressions:
- key: zone # 如果当前key中的value,在node上存在,则创建pod
operator: In
value:
- foo
- bar
软亲和调度
preferredDuringSchedulingIgnoredDuringExecution,不满足条件也可调度
apiVersion: v1
kind: Pod
metadata:
name: pod-demo
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: ikubernetes/myapp:v1
imagePullPolicy: IfNotPresent
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: zone
operator: In
values:
- foo
- bar
weight: 60
pod亲和性
与node亲和性相比,pod亲和性并不强制
以节点名称为不同位置,那么很显然每个节点都不同,因此每个节点都是独特的位置
所以另一种判断标准,以标签为位置,同样的标签为同一位置,这样才可以判断哪些满足亲和性,以及其他调度属性
[root@master affinity]# kubectl explain pods.spec.affinity. |grep podA
podAffinity <Object>
podAntiAffinity <Object>
[root@master ~]# kubectl explain pods.spec.affinity.podAffinity | grep "<.*>"
RESOURCE: nodeAffinity <Object>
preferredDuringSchedulingIgnoredDuringExecution <[]Object> #软亲和性,尽量满足条件,即不满足要求也可以调度
requiredDuringSchedulingIgnoredDuringExecution <Object> #硬亲和性必须满足条件才可以调度
亲和性调度
即两个pod, 第一个pod调度掉A节点,如果第二个pod亲和性和第一个pod相关联,那么第二个pod会调度到同一个节点或者具有同一组标签的节点上
定义第一个pod
apiVersion: v1
kind: Pod
metadata:
name: pod-first
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: busybox
imagePullPolicy: IfNotPresent
command: ["/bin/sh","-c","sleep 3600"]
定义第二个pod,(podAffinity)
apiVersion: v1
kind: Pod
metadata:
name: pod-second
labels:
app: db
tier: db
spec:
containers:
- name: busybox
image: busybox
imagePullPolicy: IfNotPresent
command: ["/bin/sh","-c","sleep 3600"]
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution: # 定义亲和性
- labelSelector:
matchExpressions: # 匹配哪个pod,要与pod标签捆绑在一起
- {key: app, operator: In, values: ["myapp"]} # 找到存在pod标签 app:myapp 的pod 优先选择
namespaces: ["default"] # 和哪个命名空间下的pod,如果不写则默认为当前命名空间
topologyKey: kubernetes.io/hostname # 以哪个节点标签为调度条件
查看效果
# 可以看到都已经调度掉一台节点上了,因为调度条件是 kubernetes.io/hostname
[root@master affinity]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-first 1/1 Running 0 103s 10.244.2.183 node02 <none> <none>
pod-second 1/1 Running 0 103s 10.244.2.182 node02 <none> <none>
pod反亲和性
取反,key的值不能是相同的,二者的值肯定不能是相同的
定义第一个pod
apiVersion: v1
kind: Pod
metadata:
name: pod-first
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: busybox
imagePullPolicy: IfNotPresent
command: ["/bin/sh","-c","sleep 3600"]
定义第二个pod,(podAntiAffinity)
apiVersion: v1
kind: Pod
metadata:
name: pod-second
labels:
app: db
tier: db
spec:
containers:
- name: busybox
image: busybox
imagePullPolicy: IfNotPresent
command: ["/bin/sh","-c","sleep 3600"]
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution: # 定义亲和性
- labelSelector:
matchExpressions: # 匹配哪个pod,要与pod标签捆绑在一起
- {key: app, operator: In, values: ["myapp"]} # 找到存在pod标签 app:myapp 的pod 优先选择
namespaces: ["default"] # 和哪个命名空间下的pod,如果不写则默认为当前命名空间
topologyKey: kubernetes.io/hostname # 以哪个节点标签为调度条件
查看效果
[root@master affinity]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-first 0/1 ContainerCreating 0 2s <none> node02 <none> <none>
pod-second 0/1 ContainerCreating 0 2s <none> node01 <none> <none>
污点/容忍调度(Taint)
taint的effect定义对Pod排斥效果:
NoSchedule:仅影响调度过程,对现存的Pod对象不产生影响;
NoExecute:既影响调度过程,也影响现在的Pod对象;不容忍的Pod对象将被驱逐;
PreferNoSchedule:尽量避免将Pod调度到具有该污点的Node上,如果其他节点都不能调度,那么可以调度到这个节点上
污点的设置和去除
# 设置污点
kubectl taint nodes node1 key1=value1:NoSchedule
# 去除污点
kubectl taint nodes node1 key1:NoSchedule-
# kubeadm部署的k8s集群,master不参与调度的污点
kubectl taint nodes <node-name> node-role.kubernetes.io/master=:NoSchedule #设置污点
kubectl taint nodes <node-name> node-role.kubernetes.io/master:NoSchedule- #去掉污点
容忍(Tolerations)
设置了污点的Node将根据taint的effect:NoSchedule、PreferNoSchedule、NoExecute和Pod之间产生互斥的关系,Pod将在一定程度上不会被调度到Node上。 但我们可以在Pod上设置容忍(Toleration),意思是设置了容忍的Pod将可以容忍污点的存在,可以被调度到存在污点的Node上。
通过在Pod的spec中设置tolerations字段,给Pod设置上容忍点Toleration:
tolerations:
- key: "key1"
operator: "Equal"
value: "value1"
effect: "NoSchedule"
- key: "key1"
operator: "Equal"
value: "value1"
effect: "NoExecute"
tolerationSeconds: 3600
- key: "key2"
operator: "Exists"
effect: "NoSchedule"
- 其中key, vaule, effect要与Node上设置的taint保持一致
- operator的值为Exists将会忽略value值,值为Equal必须相等
- tolerationSeconds用于描述当Pod需要被驱逐时可以在Pod上继续保留运行的时间
Pod上设置容忍的两个特例
示例1: 当不指定key值时,表示容忍所有的污点key:
tolerations:
- operator: "Exists"
示例2:当不指定effect值时,表示容忍所有的污点作用:
tolerations:
- key: "key"
operator: "Exists"
污点/容忍调度
创建污点
模拟生产环境,给node01打污点,用作于生产环境,给node02打污点,用作于测试环境
[root@master affinity]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master Ready master 24d v1.13.4
node01 Ready <none> 24d v1.13.4
node02 Ready <none> 24d v1.13.4
[root@master affinity]# kubectl taint nodes node01 node-type=production:NoSchedule
node/node01 tainted
[root@master affinity]# kubectl taint nodes node02 node-type=developer:NoSchedule
node/node01 tainted
容忍污点
创建pod,模拟用作于生产环境,设置污点容忍,
[root@master affinity]# vim pod-demo-tolerations.yaml
apiVersion: v1
kind: Pod
metadata:
name: pod-demo-tain
namespace: default
labels:
app: myapp
tier: frontend
spec:
containers:
- name: myapp
image: busybox
imagePullPolicy: IfNotPresent
command: ["/bin/sh","-c","sleep 3600"]
tolerations:
- key: "node-type"
operator: "Equal"
value: "production"
effect: "NoSchedule"
[root@master affinity]# kubectl apply -f pod-demo-tolerations.yaml
pod/pod-demo-tain created
[root@master affinity]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod-demo-tain 1/1 Running 0 3s 10.244.1.173 node01 <none> <none>
k8s微信交流群
image.png
如二维码过期添加群主微信 mm2199888 , 备注加群.












网友评论