使用kubeadm创建高可用集群
1 实验环境
3个master(同时在3个master使用容器方式运行etcd集群)
网络全通
可以从1台机器SSH到其他所有node
每台机器都安装有kubeadm和kubelet
1.1 拓扑结构
拓扑结构如下:
etcd/master etcd/master etcd/master
kube-node1 kube-node2 kube-node3
192.168.3.101 192.168.3.102 192.168.3.103
| | |
------------------------------------------------
|
192.168.3.1
lvs
VIP 192.168.124.99:6443
DomainName: master
|
client
1.2 配置ansible
在kube-node1上安装ansible,并测试运行正常
#在kube-node1上安装ansible软件
yum install -y ansible
#生成sshkey
ssh-keygen
#使用ssh-copy-id复制key到其他node,也可以在其他node的/root/.ssh/authorized_keys文件中添加kube-node上/root/.ssh/id_rsa.pub文件中的内容
ssh-copy-id root@kube-node1
ssh-copy-id root@kube-node2
ssh-copy-id root@kube-ndoe3
#生产ansible的hosts文件
cat >> /etc/ansible/hosts << EOF
[etcd]
kube-node1 IP=192.168.3.101
kube-node2 IP=192.168.3.102
kube-node3 IP=192.168.3.103
EOF
ansible etcd -m shell -a 'echo ${{IP}}'
kube-node3 | SUCCESS | rc=0 >>
92.168.3.103
kube-node2 | SUCCESS | rc=0 >>
92.168.3.102
kube-node1 | SUCCESS | rc=0 >>
92.168.3.101
1.3 修改各个master节点kubelet和docker的cgroup驱动
#/etc/docker/daemon.json,加入下面的"exec-opts"选项:
{
"registry-mirrors": ["https://ytr4padx.mirror.aliyuncs.com"],"exec-opts": ["native.cgroupdriver=systemd"]
}
systemctl restart docker
systemctl status docker
#修改kubelet环境参数,并重启kubelet服务
sed -i s/cgroupfs/systemd/g /var/lib/kubelet/kubeadm-flags.env
systemctl restart kubelet
2 创建etcd集群
2.1 生成kubeadm配置文件
# 设置环境变量 HOST0\HOST1\HOST2
mkdir -p /root/etcd-cluster
cd /root/etcd-cluster
export HOST0=192.168.3.101
export HOST1=192.168.3.102
export HOST2=192.168.3.103
# 创建临时目录保存配置文件
mkdir -p /root/etcd-cluster/${HOST0}/ /root/etcd-cluster/${HOST1}/ /root/etcd-cluster/${HOST2}/
ETCDHOSTS=(${HOST0} ${HOST1} ${HOST2})
NAMES=("infra0" "infra1" "infra2")
#生产配置文件
for i in "${!ETCDHOSTS[@]}"; do
HOST=${ETCDHOSTS[$i]}
NAME=${NAMES[$i]}
cat > /root/etcd-cluster/${HOST}/kubeadm-etcd.yaml << EOF
apiVersion: "kubeadm.k8s.io/v1beta1"
kind: ClusterConfiguration
etcd:
local:
serverCertSANs:
- "${HOST0}"
- "${HOST1}"
- "${HOST2}"
peerCertSANs:
- "${HOST}"
extraArgs:
initial-cluster: ${NAMES[0]}=https://${ETCDHOSTS[0]}:2380,${NAMES[1]}=https://${ETCDHOSTS[1]}:2380,${NAMES[2]}=https://${ETCDHOSTS[2]}:2380
initial-cluster-state: new
name: ${NAME}
listen-peer-urls: https://${HOST}:2380
listen-client-urls: https://${HOST}:2379
advertise-client-urls: https://${HOST}:2379
initial-advertise-peer-urls: https://${HOST}:2380
EOF
done
2.2 使用配置文件生成etcd相关证书
生成有效期20年的ETCD用的CA证书,也可以使用kubeadm init phase certs etcd-ca 命令生产ca证书,但是CA有效期只有10年
#生成CA证书
cd /root/etcd-cluster/
openssl genrsa -out ca.key 2048
openssl req -new -key ca.key -out ca.csr -set_serial 0 -subj "/CN=etcd-ca"
openssl x509 -req -days 7300 -in ca.csr -signkey ca.key -out /root/etcd-cluster/ca.crt -extfile /etc/pki/tls/openssl.cnf -extensions v3_req -extensions v3_ca
在每台机器上创建目录/etc/kubernetes/pki/etcd/和/etc/kubernetes/etcd/,用于存放证书和配置文件
ansible etcd -m shell -a 'mkdir -p /etc/kubernetes/pki/etcd/'
ansible etcd -m shell -a 'mkdir -p /etc/kubernetes/etcd/'
ansible etcd -m copy -a 'src=/root/etcd-cluster/ca.key dest=/etc/kubernetes/pki/etcd/ca.key'
ansible etcd -m copy -a 'src=/root/etcd-cluster/ca.crt dest=/etc/kubernetes/pki/etcd/ca.crt'
ansible etcd -m copy -a 'src=/root/etcd-cluster/{{IP}}/kubeadm-etcd.yaml dest=/etc/kubernetes/etcd/kubeadm-etcd.yaml'
为每一个集群成员创建证书
#创建证书生产脚本
cat > gen-cert.sh << EOF
kubeadm init phase certs etcd-server --config=/etc/kubernetes/etcd/kubeadm-etcd.yaml
kubeadm init phase certs etcd-peer --config=/etc/kubernetes/etcd/kubeadm-etcd.yaml
kubeadm init phase certs etcd-healthcheck-client --config=/etc/kubernetes/etcd/kubeadm-etcd.yaml
kubeadm init phase certs apiserver-etcd-client --config=/etc/kubernetes/etcd/kubeadm-etcd.yaml
EOF
#将脚本复制到各个节点,并运行脚本
ansible etcd -m copy -a 'src=/root/etcd-cluster/gen-cert.sh dest=/root/gen-cert.sh'
ansible etcd -m shell -a 'sh /root/gen-cert.sh'
确认证书生成成功
ansible etcd -m shell -a ' tree /etc/kubernetes/'
kube-node1 | SUCCESS | rc=0 >>
/etc/kubernetes/
├── etcd
│ └── kubeadm-etcd.yaml
├── manifests
└── pki
├── apiserver-etcd-client.crt
├── apiserver-etcd-client.key
└── etcd
├── ca.crt
├── ca.key
├── healthcheck-client.crt
├── healthcheck-client.key
├── peer.crt
├── peer.key
├── server.crt
└── server.key
4 directories, 11 files
kube-node3 | SUCCESS | rc=0 >>
/etc/kubernetes/
├── etcd
│ └── kubeadm-etcd.yaml
├── manifests
└── pki
├── apiserver-etcd-client.crt
├── apiserver-etcd-client.key
└── etcd
├── ca.crt
├── ca.key
├── healthcheck-client.crt
├── healthcheck-client.key
├── peer.crt
├── peer.key
├── server.crt
└── server.key
4 directories, 11 files
kube-node2 | SUCCESS | rc=0 >>
/etc/kubernetes/
├── etcd
│ └── kubeadm-etcd.yaml
├── manifests
└── pki
├── apiserver-etcd-client.crt
├── apiserver-etcd-client.key
└── etcd
├── ca.crt
├── ca.key
├── healthcheck-client.crt
├── healthcheck-client.key
├── peer.crt
├── peer.key
├── server.crt
└── server.key
4 directories, 11 files
ansible etcd -m shell -a 'openssl x509 -text -in /etc/kubernetes/pki/etcd/server.crt| grep DNS'
kube-node3 | SUCCESS | rc=0 >>
DNS:kube-node3, DNS:localhost, IP Address:10.0.2.15, IP Address:127.0.0.1, IP Address:0:0:0:0:0:0:0:1, IP Address:192.168.3.101, IP Address:192.168.3.102, IP Address:192.168.3.103
kube-node2 | SUCCESS | rc=0 >>
DNS:kube-node2, DNS:localhost, IP Address:10.0.2.15, IP Address:127.0.0.1, IP Address:0:0:0:0:0:0:0:1, IP Address:192.168.3.101, IP Address:192.168.3.102, IP Address:192.168.3.103
kube-node1 | SUCCESS | rc=0 >>
DNS:kube-node1, DNS:localhost, IP Address:10.0.2.15, IP Address:127.0.0.1, IP Address:0:0:0:0:0:0:0:1, IP Address:192.168.3.101, IP Address:192.168.3.102, IP Address:192.168.3.103
2.3 利用上面生成的证书在本地启动etcd
生成容器化启动脚本
cat > etcd0.sh << EOF
docker run -d --net host \
-v /etc/kubernetes:/etc/kubernetes -v /var/lib/etcd:/var/lib/etcd \
k8s.gcr.io/etcd:3.3.10 \
etcd \
--cert-file=/etc/kubernetes/pki/etcd/server.crt \
--client-cert-auth=true \
--data-dir=/var/lib/etcd \
--initial-cluster=infra0=https://192.168.3.101:2380,infra1=https://192.168.3.102:2380,infra2=https://192.168.3.103:2380 \
--initial-cluster-state=new \
--key-file=/etc/kubernetes/pki/etcd/server.key \
--peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt \
--peer-client-cert-auth=true \
--peer-key-file=/etc/kubernetes/pki/etcd/peer.key \
--peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--snapshot-count=10000 \
--trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--name=infra0 \
--advertise-client-urls=https://192.168.3.101:2379 \
--initial-advertise-peer-urls=https://192.168.3.101:2380 \
--listen-client-urls=https://192.168.3.101:2379 \
--listen-peer-urls=https://192.168.3.101:2380
EOF
cat > etcd1.sh << EOF
docker run -d --net host \
-v /etc/kubernetes:/etc/kubernetes -v /var/lib/etcd:/var/lib/etcd \
k8s.gcr.io/etcd:3.3.10 \
etcd \
--cert-file=/etc/kubernetes/pki/etcd/server.crt \
--client-cert-auth=true \
--data-dir=/var/lib/etcd \
--initial-cluster=infra0=https://192.168.3.101:2380,infra1=https://192.168.3.102:2380,infra2=https://192.168.3.103:2380 \
--initial-cluster-state=new \
--key-file=/etc/kubernetes/pki/etcd/server.key \
--peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt \
--peer-client-cert-auth=true \
--peer-key-file=/etc/kubernetes/pki/etcd/peer.key \
--peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--snapshot-count=10000 \
--trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--name=infra1 \
--advertise-client-urls=https://192.168.3.102:2379 \
--initial-advertise-peer-urls=https://192.168.3.102:2380 \
--listen-client-urls=https://192.168.3.102:2379 \
--listen-peer-urls=https://192.168.3.102:2380
EOF
cat > etcd2.sh << EOF
docker run -d --net host \
-v /etc/kubernetes:/etc/kubernetes -v /var/lib/etcd:/var/lib/etcd \
k8s.gcr.io/etcd:3.3.10 \
etcd \
--cert-file=/etc/kubernetes/pki/etcd/server.crt \
--client-cert-auth=true \
--data-dir=/var/lib/etcd \
--initial-cluster=infra0=https://192.168.3.101:2380,infra1=https://192.168.3.102:2380,infra2=https://192.168.3.103:2380 \
--initial-cluster-state=new \
--key-file=/etc/kubernetes/pki/etcd/server.key \
--peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt \
--peer-client-cert-auth=true \
--peer-key-file=/etc/kubernetes/pki/etcd/peer.key \
--peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--snapshot-count=10000 \
--trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt \
--name=infra2 \
--advertise-client-urls=https://192.168.3.103:2379 \
--initial-advertise-peer-urls=https://192.168.3.103:2380 \
--listen-client-urls=https://192.168.3.103:2379 \
--listen-peer-urls=https://192.168.3.103:2380
EOF
将脚本copy到各个master节点,并运行脚本
ansible kube-node1 -m copy -a 'src=/root/etcd-cluster/etcd0.sh dest=/root/etcd.sh'
ansible kube-node2 -m copy -a 'src=/root/etcd-cluster/etcd1.sh dest=/root/etcd.sh'
ansible kube-node3 -m copy -a 'src=/root/etcd-cluster/etcd2.sh dest=/root/etcd.sh'
ansible etcd -m shell -a 'docker kill $(docker ps -aq)'
ansible etcd -m shell -a 'docker container prune -f'
ansible etcd -m shell -a 'sh /root/etcd.sh'
2.4 确认集群状态
在192.168.3.101上check etcd集群是否正常
#设置etcdctl3别名便于使用
alias etcdctl3='docker run --rm -it \
--net host -e ETCDCTL_API=3 \
-v /etc/kubernetes:/etc/kubernetes k8s.gcr.io/etcd:3.3.10 etcdctl \
--cert /etc/kubernetes/pki/etcd/peer.crt \
--key /etc/kubernetes/pki/etcd/peer.key \
--cacert /etc/kubernetes/pki/etcd/ca.crt \
--endpoints https://192.168.3.101:2379,https://192.168.3.102:2379,https://192.168.3.103:2379'
#查看集群状态
etcdctl3 endpoint status
https://192.168.3.101:2379, ad8e97d25b4a7a33, 3.3.10, 5.3 MB, false, 991, 184683
https://192.168.3.102:2379, d04a75fb135f1017, 3.3.10, 5.3 MB, true, 991, 184683
https://192.168.3.103:2379, 5f2ea51de51c1bd4, 3.3.10, 5.3 MB, false, 991, 184683
3 创建master集群
3.1 创建kubeadm-master.yaml配置文件
在kube-node1上创建kubeadm-master.yaml配置文件
mkdir -p /root/master/
cat >> /etc/hosts << EOF
192.168.3.101 master
EOF
cat > /root/master/kubeadm-master.yaml <<EOF
apiVersion: kubeadm.k8s.io/v1beta1
kind: ClusterConfiguration
kubernetesVersion: v1.14.1
controlPlaneEndpoint: "master:6443"
apiServer:
CertSANs:
- "master"
extraArgs:
advertise-address: "192.168.3.101"
networking:
podSubnet: 10.244.0.0/16
etcd:
external:
endpoints:
- https://192.168.3.101:2379
- https://192.168.3.102:2379
- https://192.168.3.103:2379
caFile: /etc/kubernetes/pki/etcd/ca.crt
certFile: /etc/kubernetes/pki/apiserver-etcd-client.crt
keyFile: /etc/kubernetes/pki/apiserver-etcd-client.key
EOF
3.2 初始化控制平面第1台master
初始化集群
kubeadm init --config=/root/master/kubeadm-master.yaml --experimental-upload-certs
# --experimental upload certs 标志用于将在所有控制平面实例之间共享的证书上载到集群,如果希望手动或使用自动化工具跨控制平面节点复制证书,请删除此标志并参阅下面的手动证书分发部分
#配置kubectl环境
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
cat >> ~/.bash_profile << EOF
source <(kubectl completion bash)
EOF
source ~/.bash_profile
#查看集群配置
kubectl -n kube-system get configMap kubeadm-config -o yaml
#查看集群kubelet配置
kubectl -n kube-system get configMap kubelet-config-1.14 -o yaml
#查看集群上共享的证书
kubectl -n kube-system get secrets kubeadm-certs -o yaml
在节点kube-node2和kube-node2上使用kubeadm加入集群
cat >> /etc/hosts << EOF
192.168.3.101 master
EOF
kubeadm join master:6443 --token uwm1iq.bydvqhg91670w58g \
--discovery-token-ca-cert-hash sha256:fef3eba9bc05450a0e3c705239775bf5297a82ecf4faf683a7cbb3a38b433ca0 \
--experimental-control-plane --certificate-key cdad981734e547e60133a044ac72a60f6021038c5c3ca4d25a886e0ce79f9b57 \
--ignore-preflight-errors=all
# --ignore-preflight-errors=all表示忽略错误,可以不加
默认共享的证书只有2个小时有效期,必要时可以更新证书
kubeadm init phase upload-certs --experimental-upload-certs
在其他node上使用如下命令加入集群
kubeadm join master:6443 --token 74fxrx.34uwnbxun31d9cgc \
--discovery-token-ca-cert-hash sha256:7056fa528a5f0f962f565ee4eb7a96d7ecc5432883bd3c506ca0fa1eab09064c
在节点kube-node2和kube-node2上将 /etc/hosts的master地址改为自己
ansible etcd -m shell -a 'sed -i s/.*master//g /etc/hosts'
ansible etcd -m shell -a 'echo {{IP}} master >> /etc/hosts'
ansible etcd -m shell -a 'cat /etc/hosts | grep master'
kube-node2 | SUCCESS | rc=0 >>
192.168.3.102 master
kube-node1 | SUCCESS | rc=0 >>
192.168.3.101 master
kube-node3 | SUCCESS | rc=0 >>
192.168.3.103 master
4 添加网络插件
#下载网络插件yaml
cd /root/master/
wget https://docs.projectcalico.org/v3.6/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml
#替换IP地址
sed -i s/192\.168\.0\.0/10.244.0.0/g calico.yaml
#创建对象
kubectl apply -f calico.yaml
#查看Node的状态
kubectl get nodes
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 4h1m v1.14.1
kube-node2 Ready master 27m v1.14.1
kube-node3 Ready master 22m v1.14.1
5 为kube-apiserver创建外部负载均衡
配置lvs作为外部负载均衡
#新建vip服务192.168.124.99:6443
ipvsadm --add-service -t 192.168.124.99:6443
#为vip服务添加一个realserver 192.168.3.101:6443,模式为nat
ipvsadm --add-server -t 192.168.124.99:6443 -r 192.168.3.101:6443 --masquerading
ipvsadm --add-server -t 192.168.124.99:6443 -r 192.168.3.102:6443 --masquerading
ipvsadm --add-server -t 192.168.124.99:6443 -r 192.168.3.103:6443 --masquerading
在client上修改/etc/hosts
cat >> /etc/hosts << EOF
192.168.124.99 master
EOF
将kube-node1上的/etc/kubernetes/admin.conf文件中的内容复制到 $HOME/.kube/config中
在client上配置kubectl环境
mkdir -p $HOME/.kube
chown $(id -u):$(id -g) $HOME/.kube/config
cat >> ~/.bash_profile << EOF
source <(kubectl completion bash)
EOF
source ~/.bash_profile
在client上确认负载成功
for i in {1..100}; do kubectl get nodes; done
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 23h v1.14.1
kube-node2 Ready master 20h v1.14.1
kube-node3 Ready master 20h v1.14.1
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 23h v1.14.1
kube-node2 Ready master 20h v1.14.1
kube-node3 Ready master 20h v1.14.1
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 23h v1.14.1
kube-node2 Ready master 20h v1.14.1
kube-node3 Ready master 20h v1.14.1
在负载均衡上确认负载成功
ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.124.99:6443 wlc
-> 192.168.3.101:6443 Masq 1 0 33
-> 192.168.3.102:6443 Masq 1 0 33
-> 192.168.3.103:6443 Masq 1 0 34
6 故障测试
关闭kube-node1上etcd
ps -ef | grep etcd.*new | grep -v grep | awk -F' ' '{print $2}' | xargs kill
查看etcd集群状态
etcdctl3 endpoint status
Failed to get the status of endpoint https://192.168.3.101:2379 (context deadline exceeded)
https://192.168.3.102:2379, d04a75fb135f1017, 3.3.10, 5.3 MB, true, 991, 184337
https://192.168.3.103:2379, 5f2ea51de51c1bd4, 3.3.10, 5.3 MB, false, 991, 184337
查看k8s集群状态
ansible etcd -m shell -a 'kubectl get nodes' kube-node1 | SUCCESS | rc=0 >>
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 22h v1.14.1
kube-node2 Ready master 19h v1.14.1
kube-node3 Ready master 18h v1.14.1
kube-node2 | SUCCESS | rc=0 >>
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 22h v1.14.1
kube-node2 Ready master 19h v1.14.1
kube-node3 Ready master 18h v1.14.1
kube-node3 | SUCCESS | rc=0 >>
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 22h v1.14.1
kube-node2 Ready master 19h v1.14.1
kube-node3 Ready master 18h v1.14.1
在client上查看集群状态
kubectl get nodes
NAME STATUS ROLES AGE VERSION
kube-node1 Ready master 22h v1.14.1
kube-node2 Ready master 19h v1.14.1
kube-node3 Ready master 19h v1.14.1
7 测试应用
去除master上的污点
kubectl taint node kube-node1 node-role.kubernetes.io/master-
kubectl taint node kube-node2 node-role.kubernetes.io/master-
kubectl taint node kube-node3 node-role.kubernetes.io/master-
创建测试Daemonset.yaml
cat > DaemonSetforTest.yaml <<
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: testpod
labels:
app: fortest
spec:
template:
metadata:
labels:
app: fortest
spec:
containers:
- name: busybox
image: busybox:v1
command:
- /bin/sh
- -c
- 'sleep 365d'
imagePullPolicy: IfNotPresent
创建资源并检查资源创建情况
kubectl apply -f DaemonSetforTest.yaml
kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
testpod-b6v5n 1/1 Running 0 41s 10.244.233.194 kube-node2 <none> <none>
testpod-gztwp 1/1 Running 0 46s 10.244.9.66 kube-node1 <none> <none>
testpod-lfzwb 1/1 Running 0 36s 10.244.119.130 kube-node3 <none> <none>
8 异常处理
#清理配置
ansible etcd -m shell -a 'kubeadm reset -f'
ansible etcd -m shell -a 'iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X'
#在各个节点上停止etcd
ps -ef | grep etcd.*new | grep -v grep | awk -F' ' '{print $2}' | xargs kill
#重新生产证书
ansible etcd -m shell -a 'mkdir -p /etc/kubernetes/pki/etcd/'
ansible etcd -m copy -a 'src=/root/etcd-cluster/ca.key dest=/etc/kubernetes/pki/etcd/ca.key'
ansible etcd -m copy -a 'src=/root/etcd-cluster/ca.crt dest=/etc/kubernetes/pki/etcd/ca.crt'
ansible etcd -m shell -a 'sh /root/gen-cert.sh'
#启动etcd集群
ansible etcd -m shell -a 'sh /root/etcd.sh'
#查看集群节点状态
etcdctl3 endpoint status
#清空etcd数据
etcdctl3 del --prefix /registry
#重新初始化,参考3.2
9 修改apiserver的配置
在master02上需要修改
/etc/kubernetes/manifests/kube-apiserver.yaml
spec:
containers:
- command:
- kube-apiserver
- --advertise-address=22.22.3.235
确认endpoints可以看到两个master的地址
$ kubectl get ep kubernetes
NAME ENDPOINTS AGE
kubernetes 22.22.3.234:6443,22.22.3.235:6443 53d
网友评论